3 from __future__
import print_function
13 """Base class of the LHE merge schemes""" 20 """Output the merged LHE""" 23 class DefaultLHEMerger(BaseLHEMerger):
24 """Default LHE merge scheme that copies the header of the first LHE file, 25 merges and outputs the init block, then concatenates all event blocks.""" 27 def __init__(self, input_files, output_file, **kwargs):
28 super(DefaultLHEMerger, self).
__init__(input_files, output_file)
41 """Line-by-line iterator of a txt file""" 42 with open(path,
'r') as f: 47 """Check if all headers for input files are consistent.""" 52 inconsistent_error_info = (
"Incompatibility found in LHE headers: %s. " 53 "Use -b/--bypass-check to bypass the check.")
55 'nevent',
'numevts',
'iseed',
'Seed',
'Random',
'.log',
'.dat',
'.lhe',
56 'Number of Events',
'Integrated weight' 61 logging.debug(
'header line number: %s' \
65 ), inconsistent_error_info %
"line number not matches" 68 if any([k
in line_zip[0]
for k
in allow_diff_keys]):
69 logging.debug(
'Captured \'%s\', we allow difference in this line' % line_zip[0])
71 if not all([line_zip[0] == line
for line
in line_zip]):
73 for i, line
in enumerate(line_zip):
74 inconsistent_lines_set[i].
add(line)
76 assert all([inconsistent_lines_set[0] == lset
for lset
in inconsistent_lines_set]), \
77 inconsistent_error_info % (
'{' +
', '.
join(inconsistent_lines_set[0]) +
'}')
80 """Merge the headers of input LHEs. Need special handle for the MG5 LO case.""" 87 (
r"<MGGenerationInfo>\s+#\s*Number of Events\s*\:\s*(\S+)\s+" 88 r"#\s*Integrated weight \(pb\)\s*\:\s*(\S+)\s+<\/MGGenerationInfo>"),
93 [
float(info.group(2)) * nevt
for info, nevt
in zip(match_geninfo, self.
_nevent)]
95 geninfo_combined = (
"<MGGenerationInfo>\n" 96 "# Number of Events : %d\n" 97 "# Integrated weight (pb) : %.10f\n</MGGenerationInfo>") \
99 logging.info(
'Detected: MG5 LO LHEs. Input <MGGenerationInfo>:\n\tnevt\txsec')
100 for info, nevt
in zip(match_geninfo, self.
_nevent):
101 logging.info(
'\t%d\t%.10f' % (nevt,
float(info.group(2))))
102 logging.info(
'Combined <MGGenerationInfo>:\n\t%d\t%.10f' \
106 return header_combined
113 """If all <init> blocks are identical, return the same <init> block 114 (in the case of Powheg LHEs); otherwise, calculate the output <init> 115 blocks by merging the input blocks info using formula (same with the 116 MG5LOLHEMerger scheme): 117 XSECUP = sum(xsecup * no.events) / tot.events 118 XERRUP = sqrt( sum(sigma^2 * no.events^2) ) / tot.events 130 old_init_block = [{}
for _
in self.
_init_str]
138 for bl_line
in bl.split(
'\n')[1:nline + 1]:
139 bl_line_sp = bl_line.split()
140 old_init_block[i][
int(bl_line_sp[3])] = [
146 info_after_subprocess = bl.strip().
split(
'\n')[nline + 1:]
148 logging.info(
'Input file: %s' % self.
input_files[i])
149 for ipr
in sorted(
list(old_init_block[i].
keys()), reverse=
True):
151 logging.info(
' xsecup, xerrup, xmaxup, lprup: %.6E, %.6E, %.6E, %d' \
152 % tuple(old_init_block[i][ipr] + [ipr]))
157 if all([old_init_block[i] == old_init_block[0]
for i
in range(len(self.
_f))]):
160 'All input <init> blocks are identical. Output the same "<init> block.')
164 for i
in range(len(self.
_f)):
165 for ipr
in old_init_block[i]:
168 if ipr
not in new_init_block:
169 new_init_block[ipr] = [0., 0., 0.]
170 new_init_block[ipr][0] += old_init_block[i][ipr][0] * self.
_nevent[i]
171 new_init_block[ipr][1] += old_init_block[i][ipr][1]**2 * self.
_nevent[i]**2
172 new_init_block[ipr][2] =
max(new_init_block[ipr][2], old_init_block[i][ipr][2])
173 tot_nevent = sum([self.
_nevent[i]
for i
in range(len(self.
_f))])
177 +
' ' +
str(len(new_init_block)) +
'\n' 180 for ipr
in sorted(
list(new_init_block.keys()), reverse=
True):
182 new_init_block[ipr][0] /= tot_nevent
183 new_init_block[ipr][1] = math.sqrt(new_init_block[ipr][1]) / tot_nevent
184 logging.info(
' xsecup, xerrup, xmaxup, lprup: %.6E, %.6E, %.6E, %d' \
185 % tuple(new_init_block[ipr] + [ipr]))
186 self.
_merged_init_str +=
'%.6E %.6E %.6E %d\n' % tuple(new_init_block[ipr] + [ipr])
188 if len(info_after_subprocess):
196 for i
in range(len(self.
_f)):
199 while not re.search(
'\s*<init(>|\s)', line):
203 self._header_str.append(
''.
join(header))
207 for i
in range(len(self.
_f)):
210 while not re.search(
'\s*</init>', line):
214 self._init_str.append(
''.
join(init))
218 with open(
'.tmp.lhe',
'w')
as _fwtmp:
219 for i
in range(len(self.
_f)):
223 if re.search(
'\s*</event>', line):
225 if re.search(
'\s*</LesHouchesEvents>', line):
228 self._nevent.append(nevent)
239 event_norm = re.search(
240 r'\s(\w+)\s*=\s*event_norm\s',
242 if event_norm ==
'sum':
244 elif event_norm ==
'average':
246 logging.info((
"MG5 LO LHE with event_norm = %s detected. Will " 247 "recalculate weights in each event block.\n" 248 "Unit weight: %+.7E") % (event_norm, self.
_uwgt))
252 with open(
'.tmp.lhe',
'r') as ftmp: 253 sign = lambda x: -1
if x < 0
else 1
256 if re.search(
'\s*<event.*>', line):
261 orig_wgt =
float(line.split()[2])
262 fw.write(re.sub(
r'(^\s*\S+\s+\S+\s+)\S+(.+)',
r'\g<1>%+.7E\g<2>' \
264 elif re.search(
'\s*<wgt.*>.*</wgt>', line):
265 addi_wgt_str = re.search(
r'<wgt.*>\s*(\S+)\s*<\/wgt>', line).
group(1)
266 fw.write(line.replace(
267 addi_wgt_str,
'%+.7E' % (
float(addi_wgt_str) / orig_wgt * self.
_uwgt)))
272 with open(
'.tmp.lhe',
'r') as ftmp: 275 fw.write(
'</LesHouchesEvents>\n')
276 os.remove(
'.tmp.lhe')
280 """Use the merger script dedicated for MG5 LO LHEs, as introduced in 281 https://github.com/cms-sw/genproductions/blob/master/bin/MadGraph5_aMCatNLO/Utilities/merge.pl 284 def __init__(self, input_files, output_file, **kwargs):
285 super(MG5LOLHEMerger, self).
__init__(input_files, output_file)
287 'https://raw.githubusercontent.com/cms-sw/genproductions/5c1e865a6fbe3a762a28363835d9a804c9cf0dbe/bin/MadGraph5_aMCatNLO/Utilities/merge.pl' 291 (
'Use the merger script in genproductions dedicated for ' 292 'MadGraph5-produced LHEs'))
293 os.system(
'curl -s -L %s | perl - %s %s.gz banner.txt' \
296 os.system(
'rm banner.txt')
300 """Use the external mergeLheFiles.cpp file to merge LHE files, as introduced in 301 https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideSubgroupMC#1_2_Using_pLHE_campaigns 304 def __init__(self, input_files, output_file, **kwargs):
305 super(ExternalCppLHEMerger, self).
__init__(input_files, output_file)
307 'https://twiki.cern.ch/twiki/bin/viewfile/CMSPublic/SWGuideSubgroupMC?filename=mergeLheFiles.cpp;rev=2' 311 (
'Use the external mergeLheFiles.cpp file to merge LHE files.'))
313 with open(
'mergeLheFiles.cpp')
as f:
314 script_str = f.read()
315 with open(
'mergeLheFiles.cpp',
'w')
as fw:
316 fw.write(script_str.replace(
'/tmp/covarell/out.lhe', self.
output_file))
317 with open(
'input_files.txt',
'w')
as fw:
320 os.system(
'g++ -Wall -o mergeLheFiles mergeLheFiles.cpp')
321 os.system(
'./mergeLheFiles input_files.txt')
322 os.system(
'rm mergeLheFiles* input_files.txt')
326 """Main routine of the script. 329 - `argv`: arguments passed to the main routine 335 parser = argparse.ArgumentParser(
336 description=(
"A universal script that merges multiple LHE files for all possible conditions and in the most " 338 "A detailed description of the merging step (in the default mode):\n" 340 " a. assert consistency of the headers (allow difference for the info of e.g. #event, seed);\n" 341 " b. if not MG LO LHEs, will simply use the header from the first LHE; otherwise, reset the " 342 "<MGGenerationInfo> from the headers by merging the #event & xsec info;\n" 343 " 2. Init block: if all <init> blocks are the same, use the same as output; otherwise (the MG LO " 344 "case), merge them by recalculating the # of subprocess (LRPUP) and XSECUP, XERRUP, XMAXUP per " 346 " 3. Event block: concatenate all event blocks. If for MG LO LHEs, recalculate the per-event " 347 "XWGTUP and all <wgt> tags based on the new XSECUP, #event, and 'event_norm' read from the MG " 349 "For further development of this script please always validate the merging result on the test " 350 "routines: https://github.com/colizz/mergelhe_validate\n" 352 " mergeLHE.py -i 'thread*/*.lhe,another_file/another.lhe' -o output.lhe"),
353 formatter_class=argparse.RawTextHelpFormatter)
354 parser.add_argument(
"-i",
"--input-files", type=str,
355 help=
"Input LHE file paths separated by commas. Shell-type wildcards are supported.")
356 parser.add_argument(
"-o",
"--output-file",
357 default=
'output.lhe', type=str,
358 help=
"Output LHE file path.")
359 parser.add_argument(
"--force-mglo-merger", action=
'store_true',
360 help=(
"Force to use the merger script dedicated for MG5 LO LHEs, as introduced in " 361 "https://github.com/cms-sw/genproductions/blob/master/bin/MadGraph5_aMCatNLO/Utilities/merge.pl"))
362 parser.add_argument(
"--force-cpp-merger", action=
'store_true',
363 help=(
"Force to use the external mergeLheFiles.cpp file to merge LHE files, as introduced in " 364 "https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideSubgroupMC#1_2_Using_pLHE_campaigns"))
365 parser.add_argument(
"-b",
"--bypass-check", action=
'store_true',
366 help=(
"Bypass the compatibility check for the headers. If true, the header and init block " 367 "will be just a duplicate from the first input file, and events are concatenated without " 369 parser.add_argument(
"--debug", action=
'store_true',
370 help=
"Use the debug mode.")
371 args = parser.parse_args(argv)
374 format=
'[%(levelname)s] %(message)s',
375 level=logging.INFO
if not args.debug
else DEBUG)
376 logging.info(
'>>> launch mergeLHE.py in %s' % os.path.abspath(os.getcwd()))
379 assert len(args.input_files), \
380 (
'Please specify your input LHE files by -i/--input-files. ' 381 'Run \'mergeLHE.py -h\' for details.')
383 for path
in args.input_files.split(
','):
384 find_files = glob.glob(path)
385 if len(find_files) == 0:
386 logging.info(
'Warning: cannot find files in %s' % path)
387 input_files += find_files
389 logging.info(
'>>> Merge %d files: [%s]' % (len(input_files),
', '.
join(input_files)))
390 logging.info(
'>>> Write to output: %s ' % args.output_file)
392 if not os.path.exists(os.path.dirname(os.path.realpath(args.output_file))):
393 os.makedirs(os.path.dirname(os.path.realpath(args.output_file)))
396 assert len(input_files) > 0,
'Input LHE files should be more than 0.' 397 if len(input_files) == 1:
398 logging.warning(
'Input LHE only has 1 file. Will copy this file to the destination.')
400 shutil.copy(input_files[0], args.output_file)
402 assert [args.force_mglo_merger, args.force_cpp_merger].
count(
True) <= 1, \
403 "Can only specify at most one from --force-mglo-merger or --force-cpp-merger." 406 if args.force_mglo_merger:
408 elif args.force_cpp_merger:
411 lhe_merger =
DefaultLHEMerger(input_files, args.output_file, bypass_check=args.bypass_check)
417 if __name__==
"__main__":
def __init__(self, input_files, output_file)
def merge_init_blocks(self)
bool any(const std::vector< T > &v, const T &what)
def check_header_compatibility(self)
def replace(string, replacements)
def __init__(self, input_files, output_file, kwargs)
OutputIterator zip(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp)
def __init__(self, input_files, output_file, kwargs)
void add(std::map< std::string, TH1 * > &h, TH1 *hist)
static std::string join(char **cmd)
def __init__(self, input_files, output_file, kwargs)
def file_iterator(self, path)
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run