test
CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
genericValidation.py
Go to the documentation of this file.
1 import os
2 import re
3 import json
4 import globalDictionaries
5 import configTemplates
6 from dataset import Dataset
7 from helperFunctions import replaceByMap, addIndex, getCommandOutput2
8 from plottingOptions import PlottingOptions
9 from TkAlExceptions import AllInOneError
10 
11 
13  defaultReferenceName = "DEFAULT"
14  def __init__(self, valName, alignment, config, valType,
15  addDefaults = {}, addMandatories=[], addneedpackages=[]):
16  import random
17  self.name = valName
18  self.valType = valType
19  self.alignmentToValidate = alignment
20  self.general = config.getGeneral()
21  self.randomWorkdirPart = "%0i"%random.randint(1,10e9)
22  self.configFiles = []
23  self.filesToCompare = {}
24  self.config = config
25 
26  defaults = {"jobmode": self.general["jobmode"],
27  "cmssw": os.environ['CMSSW_BASE'],
28  "parallelJobs": "1"
29  }
30  defaults.update(addDefaults)
31  mandatories = []
32  mandatories += addMandatories
33  needpackages = ["Alignment/OfflineValidation"]
34  needpackages += addneedpackages
35  theUpdate = config.getResultingSection(valType+":"+self.name,
36  defaultDict = defaults,
37  demandPars = mandatories)
38  self.general.update(theUpdate)
39  self.jobmode = self.general["jobmode"]
40  self.NJobs = int(self.general["parallelJobs"])
41 
42  # limit maximum number of parallel jobs to 40
43  # (each output file is approximately 20MB)
44  maximumNumberJobs = 40
45  if self.NJobs > maximumNumberJobs:
46  msg = ("Maximum allowed number of parallel jobs "
47  +str(maximumNumberJobs)+" exceeded!!!")
48  raise AllInOneError(msg)
49 
50  self.cmssw = self.general["cmssw"]
51  badcharacters = r"\'"
52  for character in badcharacters:
53  if character in self.cmssw:
54  raise AllInOneError("The bad characters " + badcharacters + " are not allowed in the cmssw\n"
55  "path name. If you really have it in such a ridiculously named location,\n"
56  "try making a symbolic link somewhere with a decent name.")
57  try:
58  os.listdir(self.cmssw)
59  except OSError:
60  raise AllInOneError("Your cmssw release " + self.cmssw + ' does not exist')
61 
62  if self.cmssw == os.environ["CMSSW_BASE"]:
63  self.scramarch = os.environ["SCRAM_ARCH"]
64  self.cmsswreleasebase = os.environ["CMSSW_RELEASE_BASE"]
65  else:
66  command = ("cd '" + self.cmssw + "' && eval `scramv1 ru -sh 2> /dev/null`"
67  ' && echo "$CMSSW_BASE\n$SCRAM_ARCH\n$CMSSW_RELEASE_BASE"')
68  commandoutput = getCommandOutput2(command).split('\n')
69  self.cmssw = commandoutput[0]
70  self.scramarch = commandoutput[1]
71  self.cmsswreleasebase = commandoutput[2]
72 
73  self.packages = {}
74  for package in needpackages:
75  for placetolook in self.cmssw, self.cmsswreleasebase:
76  pkgpath = os.path.join(placetolook, "src", package)
77  if os.path.exists(pkgpath):
78  self.packages[package] = pkgpath
79  break
80  else:
81  raise AllInOneError("Package {} does not exist in {} or {}!".format(package, self.cmssw, self.cmsswreleasebase))
82 
83  self.AutoAlternates = True
84  if config.has_option("alternateTemplates","AutoAlternates"):
85  try:
86  self.AutoAlternates = json.loads(config.get("alternateTemplates","AutoAlternates").lower())
87  except ValueError:
88  raise AllInOneError("AutoAlternates needs to be true or false, not %s" % config.get("alternateTemplates","AutoAlternates"))
89 
90  knownOpts = defaults.keys()+mandatories
91  ignoreOpts = []
92  config.checkInput(valType+":"+self.name,
93  knownSimpleOptions = knownOpts,
94  ignoreOptions = ignoreOpts)
95 
96  def getRepMap(self, alignment = None):
97  if alignment == None:
98  alignment = self.alignmentToValidate
99  try:
100  result = PlottingOptions(self.config, self.valType)
101  except KeyError:
102  result = {}
103  result.update(alignment.getRepMap())
104  result.update( self.general )
105  result.update({
106  "workdir": os.path.join(self.general["workdir"],
107  self.randomWorkdirPart),
108  "datadir": self.general["datadir"],
109  "logdir": self.general["logdir"],
110  "CommandLineTemplate": ("#run configfile and post-proccess it\n"
111  "cmsRun %(cfgFile)s\n"
112  "%(postProcess)s "),
113  "CMSSW_BASE": self.cmssw,
114  "SCRAM_ARCH": self.scramarch,
115  "CMSSW_RELEASE_BASE": self.cmsswreleasebase,
116  "alignmentName": alignment.name,
117  "condLoad": alignment.getConditions(),
118  "condLoad": alignment.getConditions(),
119  })
120  result.update(self.packages)
121  return result
122 
123  def getCompareStrings( self, requestId = None, plain = False ):
124  result = {}
125  repMap = self.alignmentToValidate.getRepMap()
126  for validationId in self.filesToCompare:
127  repMap["file"] = self.filesToCompare[ validationId ]
128  if repMap["file"].startswith( "/castor/" ):
129  repMap["file"] = "rfio:%(file)s"%repMap
130  elif repMap["file"].startswith( "/store/" ):
131  repMap["file"] = "root://eoscms.cern.ch//eos/cms%(file)s"%repMap
132  if plain:
133  result[validationId]=repMap["file"]
134  else:
135  result[validationId]= "%(file)s=%(title)s|%(color)s|%(style)s"%repMap
136  if requestId == None:
137  return result
138  else:
139  if not "." in requestId:
140  requestId += ".%s"%GenericValidation.defaultReferenceName
141  if not requestId.split(".")[-1] in result:
142  msg = ("could not find %s in reference Objects!"
143  %requestId.split(".")[-1])
144  raise AllInOneError(msg)
145  return result[ requestId.split(".")[-1] ]
146 
147  def createFiles(self, fileContents, path, repMap = None, repMaps = None):
148  """repMap: single map for all files
149  repMaps: a dict, with the filenames as the keys"""
150  if repMap is not None and repMaps is not None:
151  raise AllInOneError("createFiles can only take repMap or repMaps (or neither), not both")
152  result = []
153  for fileName in fileContents:
154  filePath = os.path.join(path, fileName)
155  result.append(filePath)
156 
157  for (i, filePathi) in enumerate(addIndex(filePath, self.NJobs)):
158  theFile = open( filePathi, "w" )
159  fileContentsi = fileContents[ fileName ]
160  if repMaps is not None:
161  repMap = repMaps[fileName]
162  if repMap is not None:
163  repMap.update({"nIndex": str(i)})
164  fileContentsi = replaceByMap(fileContentsi, repMap)
165  theFile.write( fileContentsi )
166  theFile.close()
167 
168  return result
169 
170  def createConfiguration(self, fileContents, path, schedule = None, repMap = None, repMaps = None):
171  self.configFiles = GenericValidation.createFiles(self, fileContents,
172  path, repMap = repMap, repMaps = repMaps)
173  if not schedule == None:
174  schedule = [os.path.join( path, cfgName) for cfgName in schedule]
175  for cfgName in schedule:
176  if not cfgName in self.configFiles:
177  msg = ("scheduled %s missing in generated configfiles: %s"
178  %(cfgName, self.configFiles))
179  raise AllInOneError(msg)
180  for cfgName in self.configFiles:
181  if not cfgName in schedule:
182  msg = ("generated configuration %s not scheduled: %s"
183  %(cfgName, schedule))
184  raise AllInOneError(msg)
185  self.configFiles = schedule
186  return self.configFiles
187 
188  def createScript(self, fileContents, path, downloadFiles=[], repMap = None, repMaps = None):
189  self.scriptFiles = GenericValidation.createFiles(self, fileContents,
190  path, repMap = repMap, repMaps = repMaps)
191  for script in self.scriptFiles:
192  for scriptwithindex in addIndex(script, self.NJobs):
193  os.chmod(scriptwithindex,0o755)
194  return self.scriptFiles
195 
196  def createCrabCfg(self, fileContents, path ):
197  if self.NJobs > 1:
198  msg = ("jobmode 'crab' not supported for parallel validation."
199  " Please set parallelJobs = 1.")
200  raise AllInOneError(msg)
201  self.crabConfigFiles = GenericValidation.createFiles(self, fileContents,
202  path)
203  return self.crabConfigFiles
204 
205 
207  """
208  Subclass of `GenericValidation` which is the base for validations using
209  datasets.
210  """
211 
212  def __init__(self, valName, alignment, config, valType,
213  addDefaults = {}, addMandatories=[], addneedpackages=[]):
214  """
215  This method adds additional items to the `self.general` dictionary
216  which are only needed for validations using datasets.
217 
218  Arguments:
219  - `valName`: String which identifies individual validation instances
220  - `alignment`: `Alignment` instance to validate
221  - `config`: `BetterConfigParser` instance which includes the
222  configuration of the validations
223  - `valType`: String which specifies the type of validation
224  - `addDefaults`: Dictionary which contains default values for individual
225  validations in addition to the general default values
226  - `addMandatories`: List which contains mandatory parameters for
227  individual validations in addition to the general
228  mandatory parameters
229  """
230 
231  defaults = {"runRange": "",
232  "firstRun": "",
233  "lastRun": "",
234  "begin": "",
235  "end": "",
236  "JSON": ""
237  }
238  defaults.update(addDefaults)
239  mandatories = [ "dataset", "maxevents" ]
240  mandatories += addMandatories
241  needpackages = addneedpackages
242  GenericValidation.__init__(self, valName, alignment, config, valType, defaults, mandatories, needpackages)
243 
244  # if maxevents is not specified, cannot calculate number of events for
245  # each parallel job, and therefore running only a single job
246  if int( self.general["maxevents"] ) == -1 and self.NJobs > 1:
247  msg = ("Maximum number of events (maxevents) not specified: "
248  "cannot use parallel jobs.")
249  raise AllInOneError(msg)
250 
251  tryPredefinedFirst = (not self.jobmode.split( ',' )[0] == "crab" and self.general["JSON"] == ""
252  and self.general["firstRun"] == "" and self.general["lastRun"] == ""
253  and self.general["begin"] == "" and self.general["end"] == "")
254 
255  if self.general["dataset"] not in globalDictionaries.usedDatasets:
256  globalDictionaries.usedDatasets[self.general["dataset"]] = {}
257 
258  if self.cmssw not in globalDictionaries.usedDatasets[self.general["dataset"]]:
259  if globalDictionaries.usedDatasets[self.general["dataset"]] != {}:
260  print ("Warning: you use the same dataset '%s' in multiple cmssw releases.\n"
261  "This is allowed, but make sure it's not a mistake") % self.general["dataset"]
262  globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw] = {False: None, True: None}
263 
264  if globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw][tryPredefinedFirst] is None:
265  dataset = Dataset(
266  self.general["dataset"], tryPredefinedFirst = tryPredefinedFirst,
267  cmssw = self.cmssw, cmsswrelease = self.cmsswreleasebase )
268  globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw][tryPredefinedFirst] = dataset
269  if tryPredefinedFirst and not dataset.predefined(): #No point finding the data twice in that case
270  globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw][False] = dataset
271 
272  self.dataset = globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw][tryPredefinedFirst]
273  self.general["magneticField"] = self.dataset.magneticField()
274  self.general["defaultMagneticField"] = "MagneticField"
275  if self.general["magneticField"] == "unknown":
276  print "Could not get the magnetic field for this dataset."
277  print "Using the default: ", self.general["defaultMagneticField"]
278  self.general["magneticField"] = '.oO[defaultMagneticField]Oo.'
279 
280  if not self.jobmode.split( ',' )[0] == "crab":
281  try:
282  self.general["datasetDefinition"] = self.dataset.datasetSnippet(
283  jsonPath = self.general["JSON"],
284  firstRun = self.general["firstRun"],
285  lastRun = self.general["lastRun"],
286  begin = self.general["begin"],
287  end = self.general["end"],
288  parent = self.needParentFiles )
289  except AllInOneError as e:
290  msg = "In section [%s:%s]: "%(valType, self.name)
291  msg += str(e)
292  raise AllInOneError(msg)
293  else:
294  if self.dataset.predefined():
295  msg = ("For jobmode 'crab' you cannot use predefined datasets "
296  "(in your case: '%s')."%( self.dataset.name() ))
297  raise AllInOneError( msg )
298  try:
299  theUpdate = config.getResultingSection(valType+":"+self.name,
300  demandPars = ["parallelJobs"])
301  except AllInOneError as e:
302  msg = str(e)[:-1]+" when using 'jobmode: crab'."
303  raise AllInOneError(msg)
304  self.general.update(theUpdate)
305  if self.general["begin"] or self.general["end"]:
306  ( self.general["begin"],
307  self.general["end"],
308  self.general["firstRun"],
309  self.general["lastRun"] ) = self.dataset.convertTimeToRun(
310  firstRun = self.general["firstRun"],
311  lastRun = self.general["lastRun"],
312  begin = self.general["begin"],
313  end = self.general["end"],
314  shortTuple = False)
315  if self.general["begin"] == None:
316  self.general["begin"] = ""
317  if self.general["end"] == None:
318  self.general["end"] = ""
319  self.general["firstRun"] = str( self.general["firstRun"] )
320  self.general["lastRun"] = str( self.general["lastRun"] )
321  if ( not self.general["firstRun"] ) and \
322  ( self.general["end"] or self.general["lastRun"] ):
323  self.general["firstRun"] = str(
324  self.dataset.runList()[0]["run_number"])
325  if ( not self.general["lastRun"] ) and \
326  ( self.general["begin"] or self.general["firstRun"] ):
327  self.general["lastRun"] = str(
328  self.dataset.runList()[-1]["run_number"])
329  if self.general["firstRun"] and self.general["lastRun"]:
330  if int(self.general["firstRun"]) > int(self.general["lastRun"]):
331  msg = ( "The lower time/runrange limit ('begin'/'firstRun') "
332  "chosen is greater than the upper time/runrange limit "
333  "('end'/'lastRun').")
334  raise AllInOneError( msg )
335  self.general["runRange"] = (self.general["firstRun"]
336  + '-' + self.general["lastRun"])
337  try:
338  self.general["datasetDefinition"] = self.dataset.datasetSnippet(
339  jsonPath = self.general["JSON"],
340  firstRun = self.general["firstRun"],
341  lastRun = self.general["lastRun"],
342  begin = self.general["begin"],
343  end = self.general["end"],
344  crab = True )
345  except AllInOneError as e:
346  msg = "In section [%s:%s]: "%(valType, self.name)
347  msg += str( e )
348  raise AllInOneError( msg )
349 
350  def getRepMap(self, alignment = None):
351  result = GenericValidation.getRepMap(self, alignment)
352  outputfile = os.path.expandvars(replaceByMap(
353  "%s_%s_.oO[name]Oo..root" % (self.outputBaseName, self.name)
354  , result))
355  resultfile = os.path.expandvars(replaceByMap(("/store/caf/user/$USER/.oO[eosdir]Oo./" +
356  "%s_%s_.oO[name]Oo..root" % (self.resultBaseName, self.name))
357  , result))
358  result.update({
359  "resultFile": ".oO[resultFiles[.oO[nIndex]Oo.]]Oo.",
360  "resultFiles": addIndex(resultfile, self.NJobs),
361  "finalResultFile": resultfile,
362  "outputFile": ".oO[outputFiles[.oO[nIndex]Oo.]]Oo.",
363  "outputFiles": addIndex(outputfile, self.NJobs),
364  "finalOutputFile": outputfile
365  })
366  return result
367 
368  def createScript(self, path, template = configTemplates.scriptTemplate, downloadFiles=[], repMap = None, repMaps = None):
369  scriptName = "%s.%s.%s.sh"%(self.scriptBaseName, self.name,
370  self.alignmentToValidate.name )
371  if repMap is None and repMaps is None:
372  repMap = self.getRepMap()
373  repMap["CommandLine"]=""
374  for cfg in self.configFiles:
375  repMap["CommandLine"]+= repMap["CommandLineTemplate"]%{"cfgFile":addIndex(cfg, self.NJobs, ".oO[nIndex]Oo."),
376  "postProcess":""
377  }
378  scripts = {scriptName: template}
379  return GenericValidation.createScript(self, scripts, path, downloadFiles = downloadFiles,
380  repMap = repMap, repMaps = repMaps)
381 
382  def createCrabCfg(self, path, crabCfgBaseName):
383  """
384  Method which creates a `crab.cfg` for a validation on datasets.
385 
386  Arguments:
387  - `path`: Path at which the file will be stored.
388  - `crabCfgBaseName`: String which depends on the actual type of
389  validation calling this method.
390  """
391  crabCfgName = "crab.%s.%s.%s.cfg"%( crabCfgBaseName, self.name,
392  self.alignmentToValidate.name )
393  repMap = self.getRepMap()
394  repMap["script"] = "dummy_script.sh"
395  # repMap["crabOutputDir"] = os.path.basename( path )
396  repMap["crabWorkingDir"] = crabCfgName.split( '.cfg' )[0]
397  self.crabWorkingDir = repMap["crabWorkingDir"]
398  repMap["numberOfJobs"] = self.general["parallelJobs"]
399  repMap["cfgFile"] = self.configFiles[0]
400  repMap["queue"] = self.jobmode.split( ',' )[1].split( '-q' )[1]
401  if self.dataset.dataType() == "mc":
402  repMap["McOrData"] = "events = .oO[nEvents]Oo."
403  elif self.dataset.dataType() == "data":
404  repMap["McOrData"] = "lumis = -1"
405  if self.jobmode.split( ',' )[0] == "crab":
406  print ("For jobmode 'crab' the parameter 'maxevents' will be "
407  "ignored and all events will be processed.")
408  else:
409  raise AllInOneError("Unknown data type! Can't run in crab mode")
410  crabCfg = {crabCfgName: replaceByMap( configTemplates.crabCfgTemplate,
411  repMap ) }
412  return GenericValidation.createCrabCfg( self, crabCfg, path )
def replaceByMap
— Helpers —############################
double split
Definition: MVATrainer.cc:139