10 from collections
import namedtuple
11 from collections
import defaultdict
12 from multiprocessing.pool
import ThreadPool
14 Sequence = namedtuple(
"Sequence", [
"seqname",
"step",
"era",
"scenario",
"mc",
"data",
"fast"])
25 INFILE =
"/store/data/Run2018A/EGamma/RAW/v1/000/315/489/00000/004D960A-EA4C-E811-A908-FA163ED1F481.root"
28 BLACKLIST=
'^(TriggerResults|.*_step|DQMoutput|siPixelDigis)$'
33 @functools.lru_cache(maxsize=
None)
39 if seq.step !=
"HARVESTING":
42 otherstep =
"RAW2DIGI:siPixelDigis,"
44 wd = tempfile.mkdtemp()
47 with open(wd +
"/gdb",
"w"):
49 os.chmod(wd +
"/gdb", 0o700)
50 env = os.environ.copy()
51 env[
"PATH"] = wd +
":" + env[
"PATH"]
57 "--conditions",
"auto:run2_data",
58 "-s", otherstep+seq.step+sep+seq.seqname,
60 "--mc" if seq.mc
else "",
"--data" if seq.data
else "",
"--fast" if seq.fast
else "",
61 "--era" if seq.era
else "", seq.era,
62 "--eventcontent",
"DQM",
"--scenario" if seq.scenario
else "", seq.scenario,
63 "--datatier",
"DQMIO",
64 "--customise_commands",
'process.Tracer = cms.Service("Tracer")',
66 "--filein", INFILE,
"-n",
"0",
67 "--python_filename",
"cmssw_cfg.py",
"--no_exec"
70 driverargs = [x
for x
in driverargs
if x]
71 subprocess.check_call(driverargs, cwd=wd, stdout=2)
74 proc = subprocess.Popen([
"cmsRun",
"cmssw_cfg.py"], stderr=subprocess.STDOUT, stdout=subprocess.PIPE, cwd=wd, env=env)
75 tracedump, _ = proc.communicate()
78 if proc.returncode
and seq.step !=
"HARVESTING":
79 raise Exception(
"cmsRun failed for cmsDriver command %s" % driverargs)
81 lines = tracedump.splitlines()
82 labelre = re.compile(b
"[+]+ starting: constructing module with label '(\w+)'")
83 blacklistre = re.compile(BLACKLIST)
86 m = labelre.match(line)
88 label = m.group(1).
decode()
89 if blacklistre.match(label):
93 modules = set(modules)
96 configdump = subprocess.check_output([
"edmConfigDump",
"cmssw_cfg.py"], cwd=wd)
97 lines = configdump.splitlines()
98 modulere = re.compile(b
'process[.](.*) = cms.ED.*\("(.*)",')
106 modconfig[inconfig] += b
'\n' + line
111 m = modulere.match(line)
113 label = m.group(1).
decode()
114 plugin = m.group(2).
decode()
116 modclass[label] = plugin
117 modconfig[label] = line
121 plugininfo = tp.map(getplugininfo, modclass.values())
126 return modconfig, modclass, dict(plugininfo)
130 @functools.lru_cache(maxsize=
None)
132 plugindump = subprocess.check_output([
"edmPluginHelp",
"-p", pluginname])
133 line = plugindump.splitlines()[0].
decode()
135 pluginre = re.compile(
".* " + pluginname +
".*[(]((\w+)::)?(\w+)[)]")
136 m = pluginre.match(line)
139 return (pluginname, (
"",
""))
141 return (pluginname, (m.group(2), m.group(3)))
146 for label
in modclass.keys():
151 row.append(modclass[label])
153 row.append(
"::".
join(plugininfo[modclass[label]]))
155 row.append(modconfig[label].
decode())
156 out.append(tuple(row))
157 for row
in sorted(set(out)):
162 SEQFIELDS =
",".
join(Sequence._fields)
163 SEQPLACEHOLDER =
",".
join([
"?" for f
in Sequence._fields])
165 CREATE TABLE IF NOT EXISTS plugin(classname, edmfamily, edmbase);
166 CREATE UNIQUE INDEX IF NOT EXISTS plugins ON plugin(classname);
167 CREATE TABLE IF NOT EXISTS module(id INTEGER PRIMARY KEY, classname, instancename, variation, config);
168 CREATE UNIQUE INDEX IF NOT EXISTS modules ON module(instancename, variation);
169 CREATE UNIQUE INDEX IF NOT EXISTS configs ON module(config);
170 CREATE TABLE IF NOT EXISTS sequence(id INTEGER PRIMARY KEY, {SEQFIELDS});
171 CREATE UNIQUE INDEX IF NOT EXISTS squences ON sequence({SEQFIELDS});
172 CREATE TABLE IF NOT EXISTS workflow(wfid, sequenceid);
173 CREATE UNIQUE INDEX IF NOT EXISTS wrokflows ON workflow(sequenceid, wfid);
174 CREATE TABLE IF NOT EXISTS sequencemodule(moduleid, sequenceid);
178 with sqlite3.connect(DBFILE)
as db:
180 cur.executescript(DBSCHEMA)
182 seqid =
list(cur.execute(f
"SELECT id FROM sequence WHERE ({SEQFIELDS}) = ({SEQPLACEHOLDER});", (seq)))
186 cur.execute(
"BEGIN;")
188 cur.execute(
"CREATE TEMP TABLE newmodules(instancename, classname, config);")
189 cur.executemany(
"INSERT INTO newmodules VALUES (?, ?, ?)", ((label, modclass[label], modconfig[label])
for label
in modconfig))
192 INSERT OR IGNORE INTO module(classname, instancename, variation, config)
193 SELECT classname, instancename,
194 (SELECT count(*) FROM module AS existing WHERE existing.instancename = newmodules.instancename),
195 config FROM newmodules;
199 cur.executemany(
"INSERT OR IGNORE INTO plugin VALUES (?, ?, ?);", ((plugin, edm[0], edm[1])
for plugin, edm
in plugininfo.items()))
201 cur.execute(f
"INSERT OR FAIL INTO sequence({SEQFIELDS}) VALUES({SEQPLACEHOLDER});", (seq))
202 seqid =
list(cur.execute(f
"SELECT id FROM sequence WHERE ({SEQFIELDS}) = ({SEQPLACEHOLDER});", (seq)))
204 cur.executemany(
"INSERT INTO sequencemodule SELECT id, ? FROM module WHERE config = ?;", ((seqid, modconfig[label])
for label
in modconfig))
205 cur.execute(
"COMMIT;")
208 with sqlite3.connect(DBFILE)
as db:
210 cur.execute(
"BEGIN;")
211 cur.executescript(DBSCHEMA)
212 pairs = [[wf] +
list(seq)
for wf, seqlist
in seqs.items()
for seq
in seqlist]
213 cur.executemany(f
"INSERT OR IGNORE INTO workflow SELECT ?, (SELECT id FROM sequence WHERE ({SEQFIELDS}) = ({SEQPLACEHOLDER}));", pairs)
214 cur.execute(
"COMMIT;")
222 sequences = defaultdict(list)
225 stepdump = subprocess.check_output([
"runTheMatrix.py",
"-l",
str(wfnumber),
"-ne"])
227 stepdump = subprocess.check_output([
"runTheMatrix.py",
"-ne"])
229 lines = stepdump.splitlines()
231 workflowre = re.compile(b
"^([0-9]+.[0-9]+) ")
234 m = workflowre.match(line)
236 workflow = m.group(1).
decode()
240 if not b
'cmsDriver.py' in line:
continue
242 args =
list(reversed(line.decode().
split(
" ")))
253 if item ==
'--scenario':
254 scenario = args.pop()
263 steps = step.split(
",")
265 s = step.split(
":")[0]
266 if s
in RELEVANTSTEPS:
269 seqs = step.split(
":")[1]
270 for seq
in seqs.split(
"+"):
271 sequences[workflow].
append(
Sequence(seq, s, era, scenario, mc, data, fast))
273 sequences[workflow].
append(
Sequence(
"", s, era, scenario, mc, data, fast))
278 tasks = [stp.map_async(
lambda seq: (seq,
inspectsequence(seq)), [seq])
for seq
in seqs]
292 if not t.successful():
293 print(
"Task failed.")
305 db = sqlite3.connect(DBFILE)
308 return (seq.step +
":" + seq.seqname +
" " + seq.era +
" " + seq.scenario
309 + (
" --mc" if seq.mc
else "") + (
" --data" if seq.data
else "")
310 + (
" --fast" if seq.fast
else ""))
315 out.append(
"<H2>Sequences</H2><ul>")
316 out.append(
"""<p> A sequence name, given as <em>STEP:@sequencename</em> here, does not uniquely identify a sequence.
317 The modules on the sequence might depend on other cmsDriver options, such as Era, Scenario, Data vs. MC, etc.
318 This tool lists parameter combinations that were observed. However, sequences with identical contents are grouped
319 on this page. The default sequence, used when no explicit sequence is apssed to cmsDriver, is noted as <em>STEP:</em>.</p>""")
320 rows = cur.execute(f
"SELECT seqname, step, count(*) FROM sequence GROUP BY seqname, step ORDER BY seqname, step;")
322 seqname, step, count = row
324 out += showseq(step, seqname)
325 out.append(f
' </li>')
328 out.append(
"<H2>Modules</H2><ul>")
329 rows = cur.execute(f
"SELECT classname, edmfamily, edmbase FROM plugin ORDER BY edmfamily, edmbase, classname")
331 classname, edmfamily, edmbase = row
332 if not edmfamily: edmfamily =
"<em>legacy</em>"
333 out.append(f
' <li>{edmfamily}::{edmbase} <a href="/plugin/{classname}/">{classname}</a></li>')
337 def showseq(step, seqname):
341 out.append(f
' <a href="/seq/{step}:{seqname}/">{step}:{seqname}</a>')
345 rows = cur.execute(f
"SELECT {SEQFIELDS}, moduleid, id FROM sequence INNER JOIN sequencemodule ON sequenceid = id WHERE seqname = ? and step = ?;", (seqname, step))
347 seqs = defaultdict(list)
354 variations = defaultdict(list)
355 for seq, mods
in seqs.items():
356 variations[tuple(sorted(mods))].
append(seq)
359 for mods, seqs
in variations.items():
361 out.append(f
' <li>({count} modules):')
364 out.append(f
'<br><a href="/seqid/{seqid}">' + formatseq(seq) +
'</a>')
366 rows = cur.execute(
"SELECT wfid FROM workflow WHERE sequenceid = ?;", (seqid,))
367 out.append(f
'<em>Used on workflows: ' +
", ".
join(wfid
for wfid,
in rows) +
"</em>")
372 def showseqid(seqid):
377 rows = cur.execute(f
"SELECT {SEQFIELDS} FROM sequence WHERE id = ?;", (seqid,))
379 out.append(f
"<h2>Modules on {seq}:</h2><ul>")
380 rows = cur.execute(
"SELECT wfid FROM workflow WHERE sequenceid = ?;", (seqid,))
381 out.append(
"<p><em>Used on workflows: " +
", ".
join(wfid
for wfid,
in rows) +
"</em></p>")
382 rows = cur.execute(
"""
383 SELECT classname, instancename, variation, moduleid
384 FROM sequencemodule INNER JOIN module ON moduleid = module.id
385 WHERE sequenceid = ?;""", (seqid,))
387 classname, instancename, variation, moduleid = row
388 out.append(f
'<li>{instancename} ' + (f
'<sub>{variation}</sub>' if variation
else '') + f
' : <a href="/plugin/{classname}/">{classname}</a></li>')
393 def showclass(classname):
398 out.append(f
"<h2>Plugin {classname}</h2>")
401 rows = cur.execute(
"SELECT edmfamily, edmbase FROM plugin WHERE classname = ?;", (classname,))
402 edmfamily, edmbase =
list(rows)[0]
403 islegcay =
not edmfamily
404 if islegcay: edmfamily =
"<em>legacy</em>"
405 out.append(f
"<p>{classname} is a <b>{edmfamily}::{edmbase}</b>.</p>")
406 out.append(
"""<p>A module with a given label can have different configuration depending on options such as Era,
407 Scenario, Data vs. MC etc. If multiple configurations for the same name were found, they are listed separately
408 here and denoted using subscripts.</p>""")
409 if (edmbase !=
"EDProducer" and not (islegcay
and edmbase ==
"EDAnalyzer"))
or (islegcay
and edmbase ==
"EDProducer"):
410 out.append(f
"<p>This is not a DQM module.</p>")
413 rows = cur.execute(
"""
414 SELECT module.id, instancename, variation, sequenceid, step, seqname
415 FROM module INNER JOIN sequencemodule ON moduleid = module.id INNER JOIN sequence ON sequence.id == sequenceid
416 WHERE classname = ? ORDER BY instancename, variation, step, seqname;""", (classname,))
418 seqsformod = defaultdict(list)
421 id, instancename, variation, sequenceid, step, seqname = row
422 liformod[id] = f
'<a href="/config/{id}">{instancename}' + (f
"<sub>{variation}</sub>" if variation
else '') +
"</a>"
423 seqsformod[id].
append((sequenceid, f
"{step}:{seqname}"))
424 for id, li
in liformod.items():
425 out.append(
"<li>" + li +
' Used here: ' +
", ".
join(f
'<a href="/seqid/{seqid}">{name}</a>' for seqid, name
in seqsformod[id]) +
'.</li>')
429 def showconfig(modid):
434 rows = cur.execute(f
"SELECT config FROM module WHERE id = ?;", (modid,))
435 config =
list(rows)[0][0]
437 out.append(config.decode())
442 (re.compile(
'/$'), index),
443 (re.compile(
'/seq/(\w+):([@\w]*)/$'), showseq),
444 (re.compile(
'/seqid/(\d+)$'), showseqid),
445 (re.compile(
'/config/(\d+)$'), showconfig),
446 (re.compile(
'/plugin/(.*)/$'), showclass),
450 class Handler(http.server.SimpleHTTPRequestHandler):
454 for pattern, func
in ROUTES:
455 m = pattern.match(self.path)
461 self.send_response(200,
"Here you go")
462 self.send_header(
"Content-Type",
"text/html; charset=utf-8")
464 self.wfile.
write(b
"""<html><style>
469 self.wfile.
write(res)
470 self.wfile.
write(b
"</body></html>")
472 self.send_response(400,
"Something went wrong")
473 self.send_header(
"Content-Type",
"text/plain; charset=utf-8")
475 self.wfile.
write(b
"I don't understand this request.")
477 trace = traceback.format_exc()
478 self.send_response(500,
"Things went very wrong")
479 self.send_header(
"Content-Type",
"text/plain; charset=utf-8")
481 self.wfile.
write(trace.encode(
"utf8"))
483 server_address = (
'', 8000)
484 httpd = http.server.HTTPServer(server_address, Handler)
485 print(
"Serving at http://localhost:8000/ ...")
486 httpd.serve_forever()
489 if __name__ ==
"__main__":
492 parser = argparse.ArgumentParser(description=
'Collect information about DQM sequences.')
493 parser.add_argument(
"--sequence", default=
"", help=
"Name of the sequence")
494 parser.add_argument(
"--step", default=
"DQM", help=
"cmsDriver step that the sequence applies to")
495 parser.add_argument(
"--era", default=
"Run2_2018", help=
"CMSSW Era to use")
496 parser.add_argument(
"--scenario", default=
"pp", help=
"cmsDriver scenario")
497 parser.add_argument(
"--data", default=
False, action=
"store_true", help=
"Pass --data to cmsDriver.")
498 parser.add_argument(
"--mc", default=
False, action=
"store_true", help=
"Pass --mc to cmsDriver.")
499 parser.add_argument(
"--fast", default=
False, action=
"store_true", help=
"Pass --fast to cmsDriver.")
500 parser.add_argument(
"--workflow", default=
None, help=
"Ignore other options and inspect this workflow instead (implies --sqlite).")
501 parser.add_argument(
"--runTheMatrix", default=
False, action=
"store_true", help=
"Ignore other options and inspect the full matrix instea (implies --sqlite).")
502 parser.add_argument(
"--steps", default=
"ALCA,DQM,HARVESTING,VALIDATION", help=
"Which workflow steps to inspect from runTheMatrix.")
503 parser.add_argument(
"--sqlite", default=
False, action=
"store_true", help=
"Write information to SQLite DB instead of stdout.")
504 parser.add_argument(
"--dbfile", default=
"sequences.db", help=
"Name of the DB file to use.")
505 parser.add_argument(
"--threads", default=
None, type=int, help=
"Use a fixed number of threads (default is #cores).")
506 parser.add_argument(
"--limit", default=
None, type=int, help=
"Process only this many sequences.")
507 parser.add_argument(
"--offset", default=
None, type=int, help=
"Process sequences starting from this index. Used with --limit to divide the work into jobs.")
508 parser.add_argument(
"--showpluginlabel", default=
False, action=
"store_true", help=
"Print the module label for each plugin (default).")
509 parser.add_argument(
"--showplugintype", default=
False, action=
"store_true", help=
"Print the base class for each plugin.")
510 parser.add_argument(
"--showpluginclass", default=
False, action=
"store_true", help=
"Print the class name for each plugin.")
511 parser.add_argument(
"--showpluginconfig", default=
False, action=
"store_true", help=
"Print the config dump for each plugin.")
512 parser.add_argument(
"--serve", default=
False, action=
"store_true", help=
"Ignore other options and instead serve HTML UI from SQLite DB.")
514 args = parser.parse_args()
516 RELEVANTSTEPS += args.steps.split(
",")
520 tp = ThreadPool(args.threads)
521 stp = ThreadPool(args.threads)
525 elif args.workflow
or args.runTheMatrix:
528 seqset = set(sum(seqs.values(), []))
530 seqset =
list(sorted(seqset))[args.offset:]
532 seqset =
list(sorted(seqset))[:args.limit]
534 print(
"Analyzing %d seqs..." % len(seqset))
540 seq =
Sequence(args.sequence, args.step, args.era, args.scenario, args.mc, args.data, args.fast)
546 if not (args.showpluginlabel
or args.showpluginclass
or args.showplugintype
or args.showpluginconfig):
547 args.showpluginlabel =
True
548 formatsequenceinfo(modconfig, modclass, plugininfo, args.showpluginlabel, args.showpluginclass, args.showplugintype, args.showpluginconfig)