CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
genericValidation.py
Go to the documentation of this file.
1 import os
2 import re
3 import json
4 import globalDictionaries
5 import configTemplates
6 from dataset import Dataset
7 from helperFunctions import replaceByMap, addIndex, getCommandOutput2
8 from TkAlExceptions import AllInOneError
9 
10 
12  defaultReferenceName = "DEFAULT"
13  def __init__(self, valName, alignment, config, valType,
14  addDefaults = {}, addMandatories=[]):
15  import random
16  self.name = valName
17  self.alignmentToValidate = alignment
18  self.general = config.getGeneral()
19  self.randomWorkdirPart = "%0i"%random.randint(1,10e9)
20  self.configFiles = []
21  self.filesToCompare = {}
22  self.config = config
23 
24  defaults = {"jobmode": self.general["jobmode"],
25  "cmssw": os.environ['CMSSW_BASE'],
26  "parallelJobs": "1"
27  }
28  defaults.update(addDefaults)
29  mandatories = []
30  mandatories += addMandatories
31  theUpdate = config.getResultingSection(valType+":"+self.name,
32  defaultDict = defaults,
33  demandPars = mandatories)
34  self.general.update(theUpdate)
35  self.jobmode = self.general["jobmode"]
36  self.NJobs = int(self.general["parallelJobs"])
37 
38  # limit maximum number of parallel jobs to 40
39  # (each output file is approximately 20MB)
40  maximumNumberJobs = 40
41  if self.NJobs > maximumNumberJobs:
42  msg = ("Maximum allowed number of parallel jobs "
43  +str(maximumNumberJobs)+" exceeded!!!")
44  raise AllInOneError(msg)
45 
46  self.cmssw = self.general["cmssw"]
47  badcharacters = r"\'"
48  for character in badcharacters:
49  if character in self.cmssw:
50  raise AllInOneError("The bad characters " + badcharacters + " are not allowed in the cmssw\n"
51  "path name. If you really have it in such a ridiculously named location,\n"
52  "try making a symbolic link somewhere with a decent name.")
53  try:
54  os.listdir(self.cmssw)
55  except OSError:
56  raise AllInOneError("Your cmssw release " + self.cmssw + ' does not exist')
57 
58  if self.cmssw == os.environ["CMSSW_BASE"]:
59  self.scramarch = os.environ["SCRAM_ARCH"]
60  self.cmsswreleasebase = os.environ["CMSSW_RELEASE_BASE"]
61  else:
62  command = ("cd '" + self.cmssw + "' && eval `scramv1 ru -sh 2> /dev/null`"
63  ' && echo "$CMSSW_BASE\n$SCRAM_ARCH\n$CMSSW_RELEASE_BASE"')
64  commandoutput = getCommandOutput2(command).split('\n')
65  self.cmssw = commandoutput[0]
66  self.scramarch = commandoutput[1]
67  self.cmsswreleasebase = commandoutput[2]
68 
69  self.AutoAlternates = True
70  if config.has_option("alternateTemplates","AutoAlternates"):
71  try:
72  self.AutoAlternates = json.loads(config.get("alternateTemplates","AutoAlternates").lower())
73  except ValueError:
74  raise AllInOneError("AutoAlternates needs to be true or false, not %s" % config.get("alternateTemplates","AutoAlternates"))
75 
76  knownOpts = defaults.keys()+mandatories
77  ignoreOpts = []
78  config.checkInput(valType+":"+self.name,
79  knownSimpleOptions = knownOpts,
80  ignoreOptions = ignoreOpts)
81 
82  def getRepMap(self, alignment = None):
83  if alignment == None:
84  alignment = self.alignmentToValidate
85  result = alignment.getRepMap()
86  result.update( self.general )
87  result.update({
88  "workdir": os.path.join(self.general["workdir"],
89  self.randomWorkdirPart),
90  "datadir": self.general["datadir"],
91  "logdir": self.general["logdir"],
92  "CommandLineTemplate": ("#run configfile and post-proccess it\n"
93  "cmsRun %(cfgFile)s\n"
94  "%(postProcess)s "),
95  "CMSSW_BASE": self.cmssw,
96  "SCRAM_ARCH": self.scramarch,
97  "CMSSW_RELEASE_BASE": self.cmsswreleasebase,
98  "alignmentName": alignment.name,
99  "condLoad": alignment.getConditions(),
100  "condLoad": alignment.getConditions(),
101  })
102  return result
103 
104  def getCompareStrings( self, requestId = None, plain = False ):
105  result = {}
106  repMap = self.alignmentToValidate.getRepMap()
107  for validationId in self.filesToCompare:
108  repMap["file"] = self.filesToCompare[ validationId ]
109  if repMap["file"].startswith( "/castor/" ):
110  repMap["file"] = "rfio:%(file)s"%repMap
111  elif repMap["file"].startswith( "/store/" ):
112  repMap["file"] = "root://eoscms.cern.ch//eos/cms%(file)s"%repMap
113  if plain:
114  result[validationId]=repMap["file"]
115  else:
116  result[validationId]= "%(file)s=%(title)s|%(color)s|%(style)s"%repMap
117  if requestId == None:
118  return result
119  else:
120  if not "." in requestId:
121  requestId += ".%s"%GenericValidation.defaultReferenceName
122  if not requestId.split(".")[-1] in result:
123  msg = ("could not find %s in reference Objects!"
124  %requestId.split(".")[-1])
125  raise AllInOneError(msg)
126  return result[ requestId.split(".")[-1] ]
127 
128  def createFiles(self, fileContents, path, repMap = None, repMaps = None):
129  """repMap: single map for all files
130  repMaps: a dict, with the filenames as the keys"""
131  if repMap is not None and repMaps is not None:
132  raise AllInOneError("createFiles can only take repMap or repMaps (or neither), not both")
133  result = []
134  for fileName in fileContents:
135  filePath = os.path.join(path, fileName)
136  result.append(filePath)
137 
138  for (i, filePathi) in enumerate(addIndex(filePath, self.NJobs)):
139  theFile = open( filePathi, "w" )
140  fileContentsi = fileContents[ fileName ]
141  if repMaps is not None:
142  repMap = repMaps[fileName]
143  if repMap is not None:
144  repMap.update({"nIndex": str(i)})
145  fileContentsi = replaceByMap(fileContentsi, repMap)
146  theFile.write( fileContentsi )
147  theFile.close()
148 
149  return result
150 
151  def createConfiguration(self, fileContents, path, schedule = None, repMap = None, repMaps = None):
152  self.configFiles = GenericValidation.createFiles(self, fileContents,
153  path, repMap = repMap, repMaps = repMaps)
154  if not schedule == None:
155  schedule = [os.path.join( path, cfgName) for cfgName in schedule]
156  for cfgName in schedule:
157  if not cfgName in self.configFiles:
158  msg = ("scheduled %s missing in generated configfiles: %s"
159  %(cfgName, self.configFiles))
160  raise AllInOneError(msg)
161  for cfgName in self.configFiles:
162  if not cfgName in schedule:
163  msg = ("generated configuration %s not scheduled: %s"
164  %(cfgName, schedule))
165  raise AllInOneError(msg)
166  self.configFiles = schedule
167  return self.configFiles
168 
169  def createScript(self, fileContents, path, downloadFiles=[], repMap = None, repMaps = None):
170  self.scriptFiles = GenericValidation.createFiles(self, fileContents,
171  path, repMap = repMap, repMaps = repMaps)
172  for script in self.scriptFiles:
173  for scriptwithindex in addIndex(script, self.NJobs):
174  os.chmod(scriptwithindex,0755)
175  return self.scriptFiles
176 
177  def createCrabCfg(self, fileContents, path ):
178  if self.NJobs > 1:
179  msg = ("jobmode 'crab' not supported for parallel validation."
180  " Please set parallelJobs = 1.")
181  raise AllInOneError(msg)
182  self.crabConfigFiles = GenericValidation.createFiles(self, fileContents,
183  path)
184  return self.crabConfigFiles
185 
186 
188  """
189  Subclass of `GenericValidation` which is the base for validations using
190  datasets.
191  """
192 
193  def __init__(self, valName, alignment, config, valType,
194  addDefaults = {}, addMandatories=[]):
195  """
196  This method adds additional items to the `self.general` dictionary
197  which are only needed for validations using datasets.
198 
199  Arguments:
200  - `valName`: String which identifies individual validation instances
201  - `alignment`: `Alignment` instance to validate
202  - `config`: `BetterConfigParser` instance which includes the
203  configuration of the validations
204  - `valType`: String which specifies the type of validation
205  - `addDefaults`: Dictionary which contains default values for individual
206  validations in addition to the general default values
207  - `addMandatories`: List which contains mandatory parameters for
208  individual validations in addition to the general
209  mandatory parameters
210  """
211 
212  defaults = {"runRange": "",
213  "firstRun": "",
214  "lastRun": "",
215  "begin": "",
216  "end": "",
217  "JSON": ""
218  }
219  defaults.update(addDefaults)
220  mandatories = [ "dataset", "maxevents" ]
221  mandatories += addMandatories
222  GenericValidation.__init__(self, valName, alignment, config, valType, defaults, mandatories)
223 
224  # if maxevents is not specified, cannot calculate number of events for
225  # each parallel job, and therefore running only a single job
226  if int( self.general["maxevents"] ) == -1 and self.NJobs > 1:
227  msg = ("Maximum number of events (maxevents) not specified: "
228  "cannot use parallel jobs.")
229  raise AllInOneError(msg)
230 
231  tryPredefinedFirst = (not self.jobmode.split( ',' )[0] == "crab" and self.general["JSON"] == ""
232  and self.general["firstRun"] == "" and self.general["lastRun"] == ""
233  and self.general["begin"] == "" and self.general["end"] == "")
234 
235  if self.general["dataset"] not in globalDictionaries.usedDatasets:
236  globalDictionaries.usedDatasets[self.general["dataset"]] = {}
237 
238  if self.cmssw not in globalDictionaries.usedDatasets[self.general["dataset"]]:
239  if globalDictionaries.usedDatasets[self.general["dataset"]] != {}:
240  print ("Warning: you use the same dataset '%s' in multiple cmssw releases.\n"
241  "This is allowed, but make sure it's not a mistake") % self.general["dataset"]
242  globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw] = {False: None, True: None}
243 
244  if globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw][tryPredefinedFirst] is None:
245  dataset = Dataset(
246  self.general["dataset"], tryPredefinedFirst = tryPredefinedFirst,
247  cmssw = self.cmssw, cmsswrelease = self.cmsswreleasebase )
248  globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw][tryPredefinedFirst] = dataset
249  if tryPredefinedFirst and not dataset.predefined(): #No point finding the data twice in that case
250  globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw][False] = dataset
251 
252  self.dataset = globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw][tryPredefinedFirst]
253  self.general["magneticField"] = self.dataset.magneticField()
254  self.general["defaultMagneticField"] = "MagneticField"
255  if self.general["magneticField"] == "unknown":
256  print "Could not get the magnetic field for this dataset."
257  print "Using the default: ", self.general["defaultMagneticField"]
258  self.general["magneticField"] = '.oO[defaultMagneticField]Oo.'
259 
260  if not self.jobmode.split( ',' )[0] == "crab":
261  try:
262  self.general["datasetDefinition"] = self.dataset.datasetSnippet(
263  jsonPath = self.general["JSON"],
264  firstRun = self.general["firstRun"],
265  lastRun = self.general["lastRun"],
266  begin = self.general["begin"],
267  end = self.general["end"],
268  parent = self.needParentFiles )
269  except AllInOneError, e:
270  msg = "In section [%s:%s]: "%(valType, self.name)
271  msg += str(e)
272  raise AllInOneError(msg)
273  else:
274  if self.dataset.predefined():
275  msg = ("For jobmode 'crab' you cannot use predefined datasets "
276  "(in your case: '%s')."%( self.dataset.name() ))
277  raise AllInOneError( msg )
278  try:
279  theUpdate = config.getResultingSection(valType+":"+self.name,
280  demandPars = ["parallelJobs"])
281  except AllInOneError, e:
282  msg = str(e)[:-1]+" when using 'jobmode: crab'."
283  raise AllInOneError(msg)
284  self.general.update(theUpdate)
285  if self.general["begin"] or self.general["end"]:
286  ( self.general["begin"],
287  self.general["end"],
288  self.general["firstRun"],
289  self.general["lastRun"] ) = self.dataset.convertTimeToRun(
290  firstRun = self.general["firstRun"],
291  lastRun = self.general["lastRun"],
292  begin = self.general["begin"],
293  end = self.general["end"],
294  shortTuple = False)
295  if self.general["begin"] == None:
296  self.general["begin"] = ""
297  if self.general["end"] == None:
298  self.general["end"] = ""
299  self.general["firstRun"] = str( self.general["firstRun"] )
300  self.general["lastRun"] = str( self.general["lastRun"] )
301  if ( not self.general["firstRun"] ) and \
302  ( self.general["end"] or self.general["lastRun"] ):
303  self.general["firstRun"] = str(
304  self.dataset.runList()[0]["run_number"])
305  if ( not self.general["lastRun"] ) and \
306  ( self.general["begin"] or self.general["firstRun"] ):
307  self.general["lastRun"] = str(
308  self.dataset.runList()[-1]["run_number"])
309  if self.general["firstRun"] and self.general["lastRun"]:
310  if int(self.general["firstRun"]) > int(self.general["lastRun"]):
311  msg = ( "The lower time/runrange limit ('begin'/'firstRun') "
312  "chosen is greater than the upper time/runrange limit "
313  "('end'/'lastRun').")
314  raise AllInOneError( msg )
315  self.general["runRange"] = (self.general["firstRun"]
316  + '-' + self.general["lastRun"])
317  try:
318  self.general["datasetDefinition"] = self.dataset.datasetSnippet(
319  jsonPath = self.general["JSON"],
320  firstRun = self.general["firstRun"],
321  lastRun = self.general["lastRun"],
322  begin = self.general["begin"],
323  end = self.general["end"],
324  crab = True )
325  except AllInOneError, e:
326  msg = "In section [%s:%s]: "%(valType, self.name)
327  msg += str( e )
328  raise AllInOneError( msg )
329 
330  def getRepMap(self, alignment = None):
331  result = GenericValidation.getRepMap(self, alignment)
332  outputfile = os.path.expandvars(replaceByMap(
333  "%s_%s_.oO[name]Oo..root" % (self.outputBaseName, self.name)
334  , result))
335  resultfile = os.path.expandvars(replaceByMap(("/store/caf/user/$USER/.oO[eosdir]Oo./" +
336  "%s_%s_.oO[name]Oo..root" % (self.resultBaseName, self.name))
337  , result))
338  result.update({
339  "resultFile": ".oO[resultFiles[.oO[nIndex]Oo.]]Oo.",
340  "resultFiles": addIndex(resultfile, self.NJobs),
341  "finalResultFile": resultfile,
342  "outputFile": ".oO[outputFiles[.oO[nIndex]Oo.]]Oo.",
343  "outputFiles": addIndex(outputfile, self.NJobs),
344  "finalOutputFile": outputfile
345  })
346  return result
347 
348  def createScript(self, path, template = configTemplates.scriptTemplate, downloadFiles=[], repMap = None, repMaps = None):
349  scriptName = "%s.%s.%s.sh"%(self.scriptBaseName, self.name,
350  self.alignmentToValidate.name )
351  if repMap is None and repMaps is None:
352  repMap = self.getRepMap()
353  repMap["CommandLine"]=""
354  for cfg in self.configFiles:
355  repMap["CommandLine"]+= repMap["CommandLineTemplate"]%{"cfgFile":addIndex(cfg, self.NJobs, ".oO[nIndex]Oo."),
356  "postProcess":""
357  }
358  scripts = {scriptName: template}
359  return GenericValidation.createScript(self, scripts, path, downloadFiles = downloadFiles,
360  repMap = repMap, repMaps = repMaps)
361 
362  def createCrabCfg(self, path, crabCfgBaseName):
363  """
364  Method which creates a `crab.cfg` for a validation on datasets.
365 
366  Arguments:
367  - `path`: Path at which the file will be stored.
368  - `crabCfgBaseName`: String which depends on the actual type of
369  validation calling this method.
370  """
371  crabCfgName = "crab.%s.%s.%s.cfg"%( crabCfgBaseName, self.name,
372  self.alignmentToValidate.name )
373  repMap = self.getRepMap()
374  repMap["script"] = "dummy_script.sh"
375  # repMap["crabOutputDir"] = os.path.basename( path )
376  repMap["crabWorkingDir"] = crabCfgName.split( '.cfg' )[0]
377  self.crabWorkingDir = repMap["crabWorkingDir"]
378  repMap["numberOfJobs"] = self.general["parallelJobs"]
379  repMap["cfgFile"] = self.configFiles[0]
380  repMap["queue"] = self.jobmode.split( ',' )[1].split( '-q' )[1]
381  if self.dataset.dataType() == "mc":
382  repMap["McOrData"] = "events = .oO[nEvents]Oo."
383  elif self.dataset.dataType() == "data":
384  repMap["McOrData"] = "lumis = -1"
385  if self.jobmode.split( ',' )[0] == "crab":
386  print ("For jobmode 'crab' the parameter 'maxevents' will be "
387  "ignored and all events will be processed.")
388  else:
389  raise AllInOneError("Unknown data type! Can't run in crab mode")
390  crabCfg = {crabCfgName: replaceByMap( configTemplates.crabCfgTemplate,
391  repMap ) }
392  return GenericValidation.createCrabCfg( self, crabCfg, path )
def replaceByMap
— Helpers —############################
double split
Definition: MVATrainer.cc:139