CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
genericValidation.py
Go to the documentation of this file.
1 import os
2 import re
3 import json
4 import globalDictionaries
5 import configTemplates
6 from dataset import Dataset
7 from helperFunctions import replaceByMap, addIndex, getCommandOutput2
8 from TkAlExceptions import AllInOneError
9 
10 
12  defaultReferenceName = "DEFAULT"
13  def __init__(self, valName, alignment, config, valType,
14  addDefaults = {}, addMandatories=[]):
15  import random
16  self.name = valName
17  self.alignmentToValidate = alignment
18  self.general = config.getGeneral()
19  self.randomWorkdirPart = "%0i"%random.randint(1,10e9)
20  self.configFiles = []
21  self.filesToCompare = {}
22  self.config = config
23 
24  defaults = {"jobmode": self.general["jobmode"],
25  "cmssw": os.environ['CMSSW_BASE'],
26  "parallelJobs": "1"
27  }
28  defaults.update(addDefaults)
29  mandatories = []
30  mandatories += addMandatories
31  theUpdate = config.getResultingSection(valType+":"+self.name,
32  defaultDict = defaults,
33  demandPars = mandatories)
34  self.general.update(theUpdate)
35  self.jobmode = self.general["jobmode"]
36  self.NJobs = int(self.general["parallelJobs"])
37 
38  # limit maximum number of parallel jobs to 40
39  # (each output file is approximately 20MB)
40  maximumNumberJobs = 40
41  if self.NJobs > maximumNumberJobs:
42  msg = ("Maximum allowed number of parallel jobs "
43  +str(maximumNumberJobs)+" exceeded!!!")
44  raise AllInOneError(msg)
45 
46  self.cmssw = self.general["cmssw"]
47  badcharacters = r"\'"
48  for character in badcharacters:
49  if character in self.cmssw:
50  raise AllInOneError("The bad characters " + badcharacters + " are not allowed in the cmssw\n"
51  "path name. If you really have it in such a ridiculously named location,\n"
52  "try making a symbolic link somewhere with a decent name.")
53  try:
54  os.listdir(self.cmssw)
55  except OSError:
56  raise AllInOneError("Your cmssw release " + self.cmssw + ' does not exist')
57 
58  if self.cmssw == os.environ["CMSSW_BASE"]:
59  self.scramarch = os.environ["SCRAM_ARCH"]
60  self.cmsswreleasebase = os.environ["CMSSW_RELEASE_BASE"]
61  else:
62  command = ("cd '" + self.cmssw + "' && eval `scramv1 ru -sh 2> /dev/null`"
63  ' && echo "$CMSSW_BASE\n$SCRAM_ARCH\n$CMSSW_RELEASE_BASE"')
64  commandoutput = getCommandOutput2(command).split('\n')
65  self.cmssw = commandoutput[0]
66  self.scramarch = commandoutput[1]
67  self.cmsswreleasebase = commandoutput[2]
68 
69  self.AutoAlternates = True
70  if config.has_option("alternateTemplates","AutoAlternates"):
71  try:
72  self.AutoAlternates = json.loads(config.get("alternateTemplates","AutoAlternates").lower())
73  except ValueError:
74  raise AllInOneError("AutoAlternates needs to be true or false, not %s" % config.get("alternateTemplates","AutoAlternates"))
75 
76  knownOpts = defaults.keys()+mandatories
77  ignoreOpts = []
78  config.checkInput(valType+":"+self.name,
79  knownSimpleOptions = knownOpts,
80  ignoreOptions = ignoreOpts)
81 
82  def getRepMap(self, alignment = None):
83  if alignment == None:
84  alignment = self.alignmentToValidate
85  result = alignment.getRepMap()
86  result.update( self.general )
87  result.update({
88  "workdir": os.path.join(self.general["workdir"],
89  self.randomWorkdirPart),
90  "datadir": self.general["datadir"],
91  "logdir": self.general["logdir"],
92  "CommandLineTemplate": ("#run configfile and post-proccess it\n"
93  "cmsRun %(cfgFile)s\n"
94  "%(postProcess)s "),
95  "CMSSW_BASE": self.cmssw,
96  "SCRAM_ARCH": self.scramarch,
97  "CMSSW_RELEASE_BASE": self.cmsswreleasebase,
98  "alignmentName": alignment.name,
99  "condLoad": alignment.getConditions(),
100  })
101  return result
102 
103  def getCompareStrings( self, requestId = None, plain = False ):
104  result = {}
105  repMap = self.alignmentToValidate.getRepMap()
106  for validationId in self.filesToCompare:
107  repMap["file"] = self.filesToCompare[ validationId ]
108  if repMap["file"].startswith( "/castor/" ):
109  repMap["file"] = "rfio:%(file)s"%repMap
110  elif repMap["file"].startswith( "/store/" ):
111  repMap["file"] = "root://eoscms.cern.ch//eos/cms%(file)s"%repMap
112  if plain:
113  result[validationId]=repMap["file"]
114  else:
115  result[validationId]= "%(file)s=%(title)s|%(color)s|%(style)s"%repMap
116  if requestId == None:
117  return result
118  else:
119  if not "." in requestId:
120  requestId += ".%s"%GenericValidation.defaultReferenceName
121  if not requestId.split(".")[-1] in result:
122  msg = ("could not find %s in reference Objects!"
123  %requestId.split(".")[-1])
124  raise AllInOneError(msg)
125  return result[ requestId.split(".")[-1] ]
126 
127  def createFiles(self, fileContents, path, repMap = None, repMaps = None):
128  """repMap: single map for all files
129  repMaps: a dict, with the filenames as the keys"""
130  if repMap is not None and repMaps is not None:
131  raise AllInOneError("createFiles can only take repMap or repMaps (or neither), not both")
132  result = []
133  for fileName in fileContents:
134  filePath = os.path.join(path, fileName)
135  result.append(filePath)
136 
137  for (i, filePathi) in enumerate(addIndex(filePath, self.NJobs)):
138  theFile = open( filePathi, "w" )
139  fileContentsi = fileContents[ fileName ]
140  if repMaps is not None:
141  repMap = repMaps[fileName]
142  if repMap is not None:
143  repMap.update({"nIndex": str(i)})
144  fileContentsi = replaceByMap(fileContentsi, repMap)
145  theFile.write( fileContentsi )
146  theFile.close()
147 
148  return result
149 
150  def createConfiguration(self, fileContents, path, schedule = None, repMap = None, repMaps = None):
151  self.configFiles = GenericValidation.createFiles(self, fileContents,
152  path, repMap = repMap, repMaps = repMaps)
153  if not schedule == None:
154  schedule = [os.path.join( path, cfgName) for cfgName in schedule]
155  for cfgName in schedule:
156  if not cfgName in self.configFiles:
157  msg = ("scheduled %s missing in generated configfiles: %s"
158  %(cfgName, self.configFiles))
159  raise AllInOneError(msg)
160  for cfgName in self.configFiles:
161  if not cfgName in schedule:
162  msg = ("generated configuration %s not scheduled: %s"
163  %(cfgName, schedule))
164  raise AllInOneError(msg)
165  self.configFiles = schedule
166  return self.configFiles
167 
168  def createScript(self, fileContents, path, downloadFiles=[], repMap = None, repMaps = None):
169  self.scriptFiles = GenericValidation.createFiles(self, fileContents,
170  path, repMap = repMap, repMaps = repMaps)
171  for script in self.scriptFiles:
172  for scriptwithindex in addIndex(script, self.NJobs):
173  os.chmod(scriptwithindex,0755)
174  return self.scriptFiles
175 
176  def createCrabCfg(self, fileContents, path ):
177  if self.NJobs > 1:
178  msg = ("jobmode 'crab' not supported for parallel validation."
179  " Please set parallelJobs = 1.")
180  raise AllInOneError(msg)
181  self.crabConfigFiles = GenericValidation.createFiles(self, fileContents,
182  path)
183  return self.crabConfigFiles
184 
185 
187  """
188  Subclass of `GenericValidation` which is the base for validations using
189  datasets.
190  """
191 
192  def __init__(self, valName, alignment, config, valType,
193  addDefaults = {}, addMandatories=[]):
194  """
195  This method adds additional items to the `self.general` dictionary
196  which are only needed for validations using datasets.
197 
198  Arguments:
199  - `valName`: String which identifies individual validation instances
200  - `alignment`: `Alignment` instance to validate
201  - `config`: `BetterConfigParser` instance which includes the
202  configuration of the validations
203  - `valType`: String which specifies the type of validation
204  - `addDefaults`: Dictionary which contains default values for individual
205  validations in addition to the general default values
206  - `addMandatories`: List which contains mandatory parameters for
207  individual validations in addition to the general
208  mandatory parameters
209  """
210 
211  defaults = {"runRange": "",
212  "firstRun": "",
213  "lastRun": "",
214  "begin": "",
215  "end": "",
216  "JSON": ""
217  }
218  defaults.update(addDefaults)
219  mandatories = [ "dataset", "maxevents" ]
220  mandatories += addMandatories
221  GenericValidation.__init__(self, valName, alignment, config, valType, defaults, mandatories)
222 
223  # if maxevents is not specified, cannot calculate number of events for
224  # each parallel job, and therefore running only a single job
225  if int( self.general["maxevents"] ) == -1 and self.NJobs > 1:
226  msg = ("Maximum number of events (maxevents) not specified: "
227  "cannot use parallel jobs.")
228  raise AllInOneError(msg)
229 
230  tryPredefinedFirst = (not self.jobmode.split( ',' )[0] == "crab" and self.general["JSON"] == ""
231  and self.general["firstRun"] == "" and self.general["lastRun"] == ""
232  and self.general["begin"] == "" and self.general["end"] == "")
233 
234  if self.general["dataset"] not in globalDictionaries.usedDatasets:
235  globalDictionaries.usedDatasets[self.general["dataset"]] = {}
236 
237  if self.cmssw not in globalDictionaries.usedDatasets[self.general["dataset"]]:
238  if globalDictionaries.usedDatasets[self.general["dataset"]] != {}:
239  print ("Warning: you use the same dataset '%s' in multiple cmssw releases.\n"
240  "This is allowed, but make sure it's not a mistake") % self.general["dataset"]
241  globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw] = {False: None, True: None}
242 
243  if globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw][tryPredefinedFirst] is None:
244  dataset = Dataset(
245  self.general["dataset"], tryPredefinedFirst = tryPredefinedFirst,
246  cmssw = self.cmssw, cmsswrelease = self.cmsswreleasebase )
247  globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw][tryPredefinedFirst] = dataset
248  if tryPredefinedFirst and not dataset.predefined(): #No point finding the data twice in that case
249  globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw][False] = dataset
250 
251  self.dataset = globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw][tryPredefinedFirst]
252  self.general["magneticField"] = self.dataset.magneticField()
253  self.general["defaultMagneticField"] = "MagneticField"
254  if self.general["magneticField"] == "unknown":
255  print "Could not get the magnetic field for this dataset."
256  print "Using the default: ", self.general["defaultMagneticField"]
257  self.general["magneticField"] = '.oO[defaultMagneticField]Oo.'
258 
259  if not self.jobmode.split( ',' )[0] == "crab":
260  try:
261  self.general["datasetDefinition"] = self.dataset.datasetSnippet(
262  jsonPath = self.general["JSON"],
263  firstRun = self.general["firstRun"],
264  lastRun = self.general["lastRun"],
265  begin = self.general["begin"],
266  end = self.general["end"],
267  parent = self.needParentFiles )
268  except AllInOneError, e:
269  msg = "In section [%s:%s]: "%(valType, self.name)
270  msg += str(e)
271  raise AllInOneError(msg)
272  else:
273  if self.dataset.predefined():
274  msg = ("For jobmode 'crab' you cannot use predefined datasets "
275  "(in your case: '%s')."%( self.dataset.name() ))
276  raise AllInOneError( msg )
277  try:
278  theUpdate = config.getResultingSection(valType+":"+self.name,
279  demandPars = ["parallelJobs"])
280  except AllInOneError, e:
281  msg = str(e)[:-1]+" when using 'jobmode: crab'."
282  raise AllInOneError(msg)
283  self.general.update(theUpdate)
284  if self.general["begin"] or self.general["end"]:
285  ( self.general["begin"],
286  self.general["end"],
287  self.general["firstRun"],
288  self.general["lastRun"] ) = self.dataset.convertTimeToRun(
289  firstRun = self.general["firstRun"],
290  lastRun = self.general["lastRun"],
291  begin = self.general["begin"],
292  end = self.general["end"],
293  shortTuple = False)
294  if self.general["begin"] == None:
295  self.general["begin"] = ""
296  if self.general["end"] == None:
297  self.general["end"] = ""
298  self.general["firstRun"] = str( self.general["firstRun"] )
299  self.general["lastRun"] = str( self.general["lastRun"] )
300  if ( not self.general["firstRun"] ) and \
301  ( self.general["end"] or self.general["lastRun"] ):
302  self.general["firstRun"] = str(
303  self.dataset.runList()[0]["run_number"])
304  if ( not self.general["lastRun"] ) and \
305  ( self.general["begin"] or self.general["firstRun"] ):
306  self.general["lastRun"] = str(
307  self.dataset.runList()[-1]["run_number"])
308  if self.general["firstRun"] and self.general["lastRun"]:
309  if int(self.general["firstRun"]) > int(self.general["lastRun"]):
310  msg = ( "The lower time/runrange limit ('begin'/'firstRun') "
311  "chosen is greater than the upper time/runrange limit "
312  "('end'/'lastRun').")
313  raise AllInOneError( msg )
314  self.general["runRange"] = (self.general["firstRun"]
315  + '-' + self.general["lastRun"])
316  try:
317  self.general["datasetDefinition"] = self.dataset.datasetSnippet(
318  jsonPath = self.general["JSON"],
319  firstRun = self.general["firstRun"],
320  lastRun = self.general["lastRun"],
321  begin = self.general["begin"],
322  end = self.general["end"],
323  crab = True )
324  except AllInOneError, e:
325  msg = "In section [%s:%s]: "%(valType, self.name)
326  msg += str( e )
327  raise AllInOneError( msg )
328 
329  def getRepMap(self, alignment = None):
330  result = GenericValidation.getRepMap(self, alignment)
331  outputfile = os.path.expandvars(replaceByMap(
332  "%s_%s_.oO[name]Oo..root" % (self.outputBaseName, self.name)
333  , result))
334  resultfile = os.path.expandvars(replaceByMap(("/store/caf/user/$USER/.oO[eosdir]Oo./" +
335  "%s_%s_.oO[name]Oo..root" % (self.resultBaseName, self.name))
336  , result))
337  result.update({
338  "resultFile": ".oO[resultFiles[.oO[nIndex]Oo.]]Oo.",
339  "resultFiles": addIndex(resultfile, self.NJobs),
340  "finalResultFile": resultfile,
341  "outputFile": ".oO[outputFiles[.oO[nIndex]Oo.]]Oo.",
342  "outputFiles": addIndex(outputfile, self.NJobs),
343  "finalOutputFile": outputfile
344  })
345  return result
346 
347  def createScript(self, path, template = configTemplates.scriptTemplate, downloadFiles=[], repMap = None, repMaps = None):
348  scriptName = "%s.%s.%s.sh"%(self.scriptBaseName, self.name,
349  self.alignmentToValidate.name )
350  if repMap is None and repMaps is None:
351  repMap = self.getRepMap()
352  repMap["CommandLine"]=""
353  for cfg in self.configFiles:
354  repMap["CommandLine"]+= repMap["CommandLineTemplate"]%{"cfgFile":addIndex(cfg, self.NJobs, ".oO[nIndex]Oo."),
355  "postProcess":""
356  }
357  scripts = {scriptName: template}
358  return GenericValidation.createScript(self, scripts, path, downloadFiles = downloadFiles,
359  repMap = repMap, repMaps = repMaps)
360 
361  def createCrabCfg(self, path, crabCfgBaseName):
362  """
363  Method which creates a `crab.cfg` for a validation on datasets.
364 
365  Arguments:
366  - `path`: Path at which the file will be stored.
367  - `crabCfgBaseName`: String which depends on the actual type of
368  validation calling this method.
369  """
370  crabCfgName = "crab.%s.%s.%s.cfg"%( crabCfgBaseName, self.name,
371  self.alignmentToValidate.name )
372  repMap = self.getRepMap()
373  repMap["script"] = "dummy_script.sh"
374  # repMap["crabOutputDir"] = os.path.basename( path )
375  repMap["crabWorkingDir"] = crabCfgName.split( '.cfg' )[0]
376  self.crabWorkingDir = repMap["crabWorkingDir"]
377  repMap["numberOfJobs"] = self.general["parallelJobs"]
378  repMap["cfgFile"] = self.configFiles[0]
379  repMap["queue"] = self.jobmode.split( ',' )[1].split( '-q' )[1]
380  if self.dataset.dataType() == "mc":
381  repMap["McOrData"] = "events = .oO[nEvents]Oo."
382  elif self.dataset.dataType() == "data":
383  repMap["McOrData"] = "lumis = -1"
384  if self.jobmode.split( ',' )[0] == "crab":
385  print ("For jobmode 'crab' the parameter 'maxevents' will be "
386  "ignored and all events will be processed.")
387  else:
388  raise AllInOneError("Unknown data type! Can't run in crab mode")
389  crabCfg = {crabCfgName: replaceByMap( configTemplates.crabCfgTemplate,
390  repMap ) }
391  return GenericValidation.createCrabCfg( self, crabCfg, path )
def replaceByMap
— Helpers —############################
double split
Definition: MVATrainer.cc:139