3 from __future__
import print_function
13 """Base class of the LHE merge schemes""" 20 """Output the merged LHE""" 23 class DefaultLHEMerger(BaseLHEMerger):
24 """Default LHE merge scheme that copies the header of the first LHE file, 25 merges and outputs the init block, then concatenates all event blocks.""" 27 def __init__(self, input_files, output_file, **kwargs):
28 super(DefaultLHEMerger, self).
__init__(input_files, output_file)
41 """Line-by-line iterator of a txt file""" 42 with open(path,
'r') as f: 47 """Check if all headers for input files are consistent.""" 52 inconsistent_error_info = (
"Incompatibility found in LHE headers: %s. " 53 "Use -b/--bypass-check to bypass the check.")
55 'nevent',
'numevts',
'iseed',
'Seed',
'Random',
'.log',
'.dat',
'.lhe',
56 'Number of Events',
'Integrated weight' 61 logging.debug(
'header line number: %s' \
65 ), inconsistent_error_info %
"line number not matches" 68 if any([k
in line_zip[0]
for k
in allow_diff_keys]):
69 logging.debug(
'Captured \'%s\', we allow difference in this line' % line_zip[0])
71 if not all([line_zip[0] == line
for line
in line_zip]):
73 for i, line
in enumerate(line_zip):
74 inconsistent_lines_set[i].
add(line)
76 assert all([inconsistent_lines_set[0] == lset
for lset
in inconsistent_lines_set]), \
77 inconsistent_error_info % (
'{' +
', '.
join(inconsistent_lines_set[0]) +
'}')
80 """Merge the headers of input LHEs. Need special handle for the MG5 LO case.""" 87 (
r"<MGGenerationInfo>\s+#\s*Number of Events\s*\:\s*(\S+)\s+" 88 r"#\s*Integrated weight \(pb\)\s*\:\s*(\S+)\s+<\/MGGenerationInfo>"),
93 [
float(info.group(2)) * nevt
for info, nevt
in zip(match_geninfo, self.
_nevent)]
95 geninfo_combined = (
"<MGGenerationInfo>\n" 96 "# Number of Events : %d\n" 97 "# Integrated weight (pb) : %.10f\n</MGGenerationInfo>") \
99 logging.info(
'Detected: MG5 LO LHEs. Input <MGGenerationInfo>:\n\tnevt\txsec')
100 for info, nevt
in zip(match_geninfo, self.
_nevent):
101 logging.info(
'\t%d\t%.10f' % (nevt,
float(info.group(2))))
102 logging.info(
'Combined <MGGenerationInfo>:\n\t%d\t%.10f' \
106 return header_combined
113 """If all <init> blocks are identical, return the same <init> block 114 (in the case of Powheg LHEs); otherwise, calculate the output <init> 115 blocks by merging the input blocks info using formula (same with the 116 MG5LOLHEMerger scheme): 117 XSECUP = sum(xsecup * no.events) / tot.events 118 XERRUP = sqrt( sum(sigma^2 * no.events^2) ) / tot.events 130 old_init_block = [{}
for _
in self.
_init_str]
138 for bl_line
in bl.split(
'\n')[1:nline + 1]:
139 bl_line_sp = bl_line.split()
140 old_init_block[i][
int(bl_line_sp[3])] = [
146 info_after_subprocess = bl.strip().
split(
'\n')[nline + 1:]
148 logging.info(
'Input file: %s' % self.
input_files[i])
149 for ipr
in sorted(list(old_init_block[i].
keys()), reverse=
True):
151 logging.info(
' xsecup, xerrup, xmaxup, lprup: %.6E, %.6E, %.6E, %d' \
152 %
tuple(old_init_block[i][ipr] + [ipr]))
157 if all([old_init_block[i] == old_init_block[0]
for i
in range(len(self.
_f))]):
160 'All input <init> blocks are identical. Output the same "<init> block.')
165 for ipr
in old_init_block[i]:
168 if ipr
not in new_init_block:
169 new_init_block[ipr] = [0., 0., 0.]
170 new_init_block[ipr][0] += old_init_block[i][ipr][0] * self.
_nevent[i]
171 new_init_block[ipr][1] += old_init_block[i][ipr][1]**2 * self.
_nevent[i]**2
172 new_init_block[ipr][2] =
max(new_init_block[ipr][2], old_init_block[i][ipr][2])
177 +
' ' +
str(len(new_init_block)) +
'\n' 180 for ipr
in sorted(list(new_init_block.keys()), reverse=
True):
182 new_init_block[ipr][0] /= tot_nevent
183 new_init_block[ipr][1] = math.sqrt(new_init_block[ipr][1]) / tot_nevent
184 logging.info(
' xsecup, xerrup, xmaxup, lprup: %.6E, %.6E, %.6E, %d' \
185 %
tuple(new_init_block[ipr] + [ipr]))
188 if len(info_after_subprocess):
199 while not re.search(
'\s*<init(>|\s)', line):
210 while not re.search(
'\s*</init>', line):
218 with open(
'.tmp.lhe',
'w')
as _fwtmp:
223 if re.search(
'\s*</event>', line):
225 if re.search(
'\s*</LesHouchesEvents>', line):
239 event_norm = re.search(
240 r'\s(\w+)\s*=\s*event_norm\s',
242 if event_norm ==
'sum':
244 elif event_norm ==
'average':
246 logging.info((
"MG5 LO LHE with event_norm = %s detected. Will " 247 "recalculate weights in each event block.\n" 248 "Unit weight: %+.7E") % (event_norm, self.
_uwgt))
252 with open(
'.tmp.lhe',
'r') as ftmp: 253 sign = lambda x: -1
if x < 0
else 1
256 if re.search(
'\s*<event.*>', line):
261 orig_wgt =
float(line.split()[2])
262 fw.write(re.sub(
r'(^\s*\S+\s+\S+\s+)\S+(.+)',
r'\g<1>%+.7E\g<2>' \
264 elif re.search(
'\s*<wgt.*>.*</wgt>', line):
265 addi_wgt_str = re.search(
r'<wgt.*>\s*(\S+)\s*<\/wgt>', line).
group(1)
266 fw.write(line.replace(
267 addi_wgt_str,
'%+.7E' % (
float(addi_wgt_str) / orig_wgt * self.
_uwgt)))
272 with open(
'.tmp.lhe',
'r') as ftmp: 275 fw.write(
'</LesHouchesEvents>\n')
276 os.remove(
'.tmp.lhe')
280 """Use the merger script dedicated for MG5 LO LHEs, as introduced in 281 https://github.com/cms-sw/genproductions/blob/master/bin/MadGraph5_aMCatNLO/Utilities/merge.pl 284 def __init__(self, input_files, output_file, **kwargs):
285 super(MG5LOLHEMerger, self).
__init__(input_files, output_file)
287 'https://raw.githubusercontent.com/cms-sw/genproductions/5c1e865a6fbe3a762a28363835d9a804c9cf0dbe/bin/MadGraph5_aMCatNLO/Utilities/merge.pl' 291 (
'Use the merger script in genproductions dedicated for ' 292 'MadGraph5-produced LHEs'))
293 os.system(
'curl -s -L %s | perl - %s %s.gz banner.txt' \
296 os.system(
'rm banner.txt')
300 """Use the external mergeLheFiles.cpp file to merge LHE files, as introduced in 301 https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideSubgroupMC#1_2_Using_pLHE_campaigns 304 def __init__(self, input_files, output_file, **kwargs):
305 super(ExternalCppLHEMerger, self).
__init__(input_files, output_file)
307 'https://twiki.cern.ch/twiki/bin/viewfile/CMSPublic/SWGuideSubgroupMC?filename=mergeLheFiles.cpp;rev=2' 311 (
'Use the external mergeLheFiles.cpp file to merge LHE files.'))
313 with open(
'mergeLheFiles.cpp')
as f:
314 script_str = f.read()
315 with open(
'mergeLheFiles.cpp',
'w')
as fw:
316 fw.write(script_str.replace(
'/tmp/covarell/out.lhe', self.
output_file))
317 with open(
'input_files.txt',
'w')
as fw:
320 os.system(
'g++ -Wall -o mergeLheFiles mergeLheFiles.cpp')
321 os.system(
'./mergeLheFiles input_files.txt')
322 os.system(
'rm mergeLheFiles* input_files.txt')
326 """Main routine of the script. 329 - `argv`: arguments passed to the main routine 335 parser = argparse.ArgumentParser(
336 description=(
"A universal script that merges multiple LHE files for all possible conditions and in the most " 338 "A detailed description of the merging step (in the default mode):\n" 340 " a. assert consistency of the headers (allow difference for the info of e.g. #event, seed);\n" 341 " b. if not MG LO LHEs, will simply use the header from the first LHE; otherwise, reset the " 342 "<MGGenerationInfo> from the headers by merging the #event & xsec info;\n" 343 " 2. Init block: if all <init> blocks are the same, use the same as output; otherwise (the MG LO " 344 "case), merge them by recalculating the # of subprocess (LRPUP) and XSECUP, XERRUP, XMAXUP per " 346 " 3. Event block: concatenate all event blocks. If for MG LO LHEs, recalculate the per-event " 347 "XWGTUP and all <wgt> tags based on the new XSECUP, #event, and 'event_norm' read from the MG " 349 "For further development of this script please always validate the merging result on the test " 350 "routines: https://github.com/colizz/mergelhe_validate\n" 352 " mergeLHE.py -i 'thread*/*.lhe,another_file/another.lhe' -o output.lhe"),
353 formatter_class=argparse.RawTextHelpFormatter)
354 parser.add_argument(
"-i",
"--input-files", type=str,
355 help=
"Input LHE file paths separated by commas. Shell-type wildcards are supported.")
356 parser.add_argument(
"-o",
"--output-file",
357 default=
'output.lhe', type=str,
358 help=
"Output LHE file path.")
359 parser.add_argument(
"--force-mglo-merger", action=
'store_true',
360 help=(
"Force to use the merger script dedicated for MG5 LO LHEs, as introduced in " 361 "https://github.com/cms-sw/genproductions/blob/master/bin/MadGraph5_aMCatNLO/Utilities/merge.pl"))
362 parser.add_argument(
"--force-cpp-merger", action=
'store_true',
363 help=(
"Force to use the external mergeLheFiles.cpp file to merge LHE files, as introduced in " 364 "https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideSubgroupMC#1_2_Using_pLHE_campaigns"))
365 parser.add_argument(
"-b",
"--bypass-check", action=
'store_true',
366 help=(
"Bypass the compatibility check for the headers. If true, the header and init block " 367 "will be just a duplicate from the first input file, and events are concatenated without " 369 parser.add_argument(
"--debug", action=
'store_true',
370 help=
"Use the debug mode.")
371 args = parser.parse_args(argv)
374 format=
'[%(levelname)s] %(message)s',
375 level=logging.INFO
if not args.debug
else DEBUG)
376 logging.info(
'>>> launch mergeLHE.py in %s' % os.path.abspath(os.getcwd()))
379 assert len(args.input_files), \
380 (
'Please specify your input LHE files by -i/--input-files. ' 381 'Run \'mergeLHE.py -h\' for details.')
383 for path
in args.input_files.split(
','):
384 find_files = glob.glob(path)
385 if len(find_files) == 0:
386 logging.info(
'Warning: cannot find files in %s' % path)
387 input_files += find_files
389 logging.info(
'>>> Merge %d files: [%s]' % (len(input_files),
', '.
join(input_files)))
390 logging.info(
'>>> Write to output: %s ' % args.output_file)
392 if not os.path.exists(os.path.dirname(os.path.realpath(args.output_file))):
393 os.makedirs(os.path.dirname(os.path.realpath(args.output_file)))
396 assert len(input_files) > 0,
'Input LHE files should be more than 0.' 397 if len(input_files) == 1:
398 logging.warning(
'Input LHE only has 1 file. Will copy this file to the destination.')
400 shutil.copy(input_files[0], args.output_file)
402 assert [args.force_mglo_merger, args.force_cpp_merger].
count(
True) <= 1, \
403 "Can only specify at most one from --force-mglo-merger or --force-cpp-merger." 406 if args.force_mglo_merger:
408 elif args.force_cpp_merger:
411 lhe_merger =
DefaultLHEMerger(input_files, args.output_file, bypass_check=args.bypass_check)
417 if __name__==
"__main__":
def __init__(self, input_files, output_file)
def merge_init_blocks(self)
bool any(const std::vector< T > &v, const T &what)
def check_header_compatibility(self)
def replace(string, replacements)
def __init__(self, input_files, output_file, kwargs)
OutputIterator zip(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp)
def split(sequence, size)
def __init__(self, input_files, output_file, kwargs)
static std::string join(char **cmd)
def __init__(self, input_files, output_file, kwargs)
void add(std::map< std::string, TH1 *> &h, TH1 *hist)
def file_iterator(self, path)