3 from __future__
import print_function
16 """Base class of the LHE merge schemes""" 23 """Output the merged LHE""" 26 class DefaultLHEMerger(BaseLHEMerger):
27 """Default LHE merge scheme that copies the header of the first LHE file, 28 merges and outputs the init block, then concatenates all event blocks.""" 30 def __init__(self, input_files, output_file, **kwargs):
31 super(DefaultLHEMerger, self).
__init__(input_files, output_file)
44 """Line-by-line iterator of a txt file""" 45 with open(path,
'r') as f: 50 """Check if all headers for input files are consistent.""" 55 inconsistent_error_info = (
"Incompatibility found in LHE headers: %s. " 56 "Use -b/--bypass-check to bypass the check.")
58 'nevent',
'numevts',
'iseed',
'Seed',
'Random',
'.log',
'.dat',
'.lhe',
59 'Number of Events',
'Integrated weight' 64 logging.debug(
'header line number: %s' \
68 ), inconsistent_error_info %
"line number does not match" 71 if any([k
in line_zip[0]
for k
in allow_diff_keys]):
72 logging.debug(
'Captured \'%s\', we allow difference in this line' % line_zip[0])
74 if not all([line_zip[0] == line
for line
in line_zip]):
76 for i, line
in enumerate(line_zip):
77 inconsistent_lines_set[i].
add(line)
79 assert all([inconsistent_lines_set[0] == lset
for lset
in inconsistent_lines_set]), \
80 inconsistent_error_info % (
'{' +
', '.
join(inconsistent_lines_set[0]) +
'}')
83 """Merge the headers of input LHEs. Need special handle for the MG5 LO case.""" 90 (
r"<MGGenerationInfo>\s+#\s*Number of Events\s*\:\s*(\S+)\s+" 91 r"#\s*Integrated weight \(pb\)\s*\:\s*(\S+)\s+<\/MGGenerationInfo>"),
96 [
float(info.group(2)) * nevt
for info, nevt
in zip(match_geninfo, self.
_nevent)]
98 geninfo_combined = (
"<MGGenerationInfo>\n" 99 "# Number of Events : %d\n" 100 "# Integrated weight (pb) : %.10f\n</MGGenerationInfo>") \
102 logging.info(
'Detected: MG5 LO LHEs. Input <MGGenerationInfo>:\n\tnevt\txsec')
103 for info, nevt
in zip(match_geninfo, self.
_nevent):
104 logging.info(
'\t%d\t%.10f' % (nevt,
float(info.group(2))))
105 logging.info(
'Combined <MGGenerationInfo>:\n\t%d\t%.10f' \
109 return header_combined
116 """If all <init> blocks are identical, return the same <init> block 117 (in the case of Powheg LHEs); otherwise, calculate the output <init> 118 blocks by merging the input blocks info using formula (same with the 119 MG5LOLHEMerger scheme): 120 XSECUP = sum(xsecup * no.events) / tot.events 121 XERRUP = sqrt( sum(sigma^2 * no.events^2) ) / tot.events 133 old_init_block = [{}
for _
in self.
_init_str]
141 for bl_line
in bl.split(
'\n')[1:nline + 1]:
142 bl_line_sp = bl_line.split()
143 old_init_block[i][
int(bl_line_sp[3])] = [
149 info_after_subprocess = bl.strip().
split(
'\n')[nline + 1:]
151 logging.info(
'Input file: %s' % self.
input_files[i])
152 for ipr
in sorted(list(old_init_block[i].
keys()), reverse=
True):
154 logging.info(
' xsecup, xerrup, xmaxup, lprup: %.6E, %.6E, %.6E, %d' \
155 %
tuple(old_init_block[i][ipr] + [ipr]))
160 if all([old_init_block[i] == old_init_block[0]
for i
in range(len(self.
_f))]):
163 'All input <init> blocks are identical. Output the same "<init> block.')
168 for ipr
in old_init_block[i]:
171 if ipr
not in new_init_block:
172 new_init_block[ipr] = [0., 0., 0.]
173 new_init_block[ipr][0] += old_init_block[i][ipr][0] * self.
_nevent[i]
174 new_init_block[ipr][1] += old_init_block[i][ipr][1]**2 * self.
_nevent[i]**2
175 new_init_block[ipr][2] =
max(new_init_block[ipr][2], old_init_block[i][ipr][2])
180 +
' ' +
str(len(new_init_block)) +
'\n' 183 for ipr
in sorted(list(new_init_block.keys()), reverse=
True):
185 new_init_block[ipr][0] /= tot_nevent
186 new_init_block[ipr][1] = math.sqrt(new_init_block[ipr][1]) / tot_nevent
187 logging.info(
' xsecup, xerrup, xmaxup, lprup: %.6E, %.6E, %.6E, %d' \
188 %
tuple(new_init_block[ipr] + [ipr]))
191 if len(info_after_subprocess):
202 while not re.search(
'\s*<init(>|\s)', line):
213 while not re.search(
'\s*</init>', line):
221 with open(
'.tmp.lhe',
'w')
as _fwtmp:
226 if re.search(
'\s*</event>', line):
228 if re.search(
'\s*</LesHouchesEvents>', line):
242 event_norm = re.search(
243 r'\s(\w+)\s*=\s*event_norm\s',
245 if event_norm ==
'sum':
247 elif event_norm ==
'average':
249 logging.info((
"MG5 LO LHE with event_norm = %s detected. Will " 250 "recalculate weights in each event block.\n" 251 "Unit weight: %+.7E") % (event_norm, self.
_uwgt))
255 with open(
'.tmp.lhe',
'r') as ftmp: 256 sign = lambda x: -1
if x < 0
else 1
259 if re.search(
'\s*<event.*>', line)
and not re.search(
'\s*<event_num.*>', line):
264 orig_wgt =
float(line.split()[2])
265 fw.write(re.sub(
r'(^\s*\S+\s+\S+\s+)\S+(.+)',
r'\g<1>%+.7E\g<2>' \
267 elif re.search(
'\s*<wgt.*>.*</wgt>', line):
268 addi_wgt_str = re.search(
r'<wgt.*>\s*(\S+)\s*<\/wgt>', line).
group(1)
269 fw.write(line.replace(
270 addi_wgt_str,
'%+.7E' % (
float(addi_wgt_str) / orig_wgt * self.
_uwgt)))
275 with open(
'.tmp.lhe',
'r') as ftmp: 278 fw.write(
'</LesHouchesEvents>\n')
279 os.remove(
'.tmp.lhe')
283 """Use the merger script dedicated for MG5 LO LHEs, as introduced in 284 https://github.com/cms-sw/genproductions/blob/master/bin/MadGraph5_aMCatNLO/Utilities/merge.pl 287 def __init__(self, input_files, output_file, **kwargs):
288 super(MG5LOLHEMerger, self).
__init__(input_files, output_file)
290 'https://raw.githubusercontent.com/cms-sw/genproductions/5c1e865a6fbe3a762a28363835d9a804c9cf0dbe/bin/MadGraph5_aMCatNLO/Utilities/merge.pl' 294 (
'Use the merger script in genproductions dedicated for ' 295 'MadGraph5-produced LHEs'))
296 os.system(
'curl -s -L %s | perl - %s %s.gz banner.txt' \
299 os.system(
'rm banner.txt')
303 """Use the external mergeLheFiles.cpp file to merge LHE files, as introduced in 304 https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideSubgroupMC#1_2_Using_pLHE_campaigns 307 def __init__(self, input_files, output_file, **kwargs):
308 super(ExternalCppLHEMerger, self).
__init__(input_files, output_file)
310 'https://twiki.cern.ch/twiki/bin/viewfile/CMSPublic/SWGuideSubgroupMC?filename=mergeLheFiles.cpp;rev=2' 314 (
'Use the external mergeLheFiles.cpp file to merge LHE files.'))
316 with open(
'mergeLheFiles.cpp')
as f:
317 script_str = f.read()
318 with open(
'mergeLheFiles.cpp',
'w')
as fw:
319 fw.write(script_str.replace(
'/tmp/covarell/out.lhe', self.
output_file))
320 with open(
'input_files.txt',
'w')
as fw:
323 os.system(
'g++ -Wall -o mergeLheFiles mergeLheFiles.cpp')
324 os.system(
'./mergeLheFiles input_files.txt')
325 os.system(
'rm mergeLheFiles* input_files.txt')
329 """Main routine of the script. 332 - `argv`: arguments passed to the main routine 338 parser = argparse.ArgumentParser(
339 description=(
"A universal script that merges multiple LHE files for all possible conditions and in the most " 341 "A detailed description of the merging step (in the default mode):\n" 343 " a. assert consistency of the headers (allow difference for the info of e.g. #event, seed);\n" 344 " b. if not MG LO LHEs, will simply use the header from the first LHE; otherwise, reset the " 345 "<MGGenerationInfo> from the headers by merging the #event & xsec info;\n" 346 " 2. Init block: if all <init> blocks are the same, use the same as output; otherwise (the MG LO " 347 "case), merge them by recalculating the # of subprocess (LRPUP) and XSECUP, XERRUP, XMAXUP per " 349 " 3. Event block: concatenate all event blocks. If for MG LO LHEs, recalculate the per-event " 350 "XWGTUP and all <wgt> tags based on the new XSECUP, #event, and 'event_norm' read from the MG " 352 "For further development of this script please always validate the merging result on the test " 353 "routines: https://github.com/colizz/mergelhe_validate\n" 355 " mergeLHE.py -i 'thread*/*.lhe,another_file/another.lhe' -o output.lhe"),
356 formatter_class=argparse.RawTextHelpFormatter)
357 parser.add_argument(
"-i",
"--input-files", type=str,
358 help=
"Input LHE file paths separated by commas. Shell-type wildcards are supported.")
359 parser.add_argument(
"-o",
"--output-file",
360 default=
'output.lhe', type=str,
361 help=
"Output LHE file path.")
362 parser.add_argument(
"--force-mglo-merger", action=
'store_true',
363 help=(
"Force to use the merger script dedicated for MG5 LO LHEs, as introduced in " 364 "https://github.com/cms-sw/genproductions/blob/master/bin/MadGraph5_aMCatNLO/Utilities/merge.pl"))
365 parser.add_argument(
"--force-cpp-merger", action=
'store_true',
366 help=(
"Force to use the external mergeLheFiles.cpp file to merge LHE files, as introduced in " 367 "https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideSubgroupMC#1_2_Using_pLHE_campaigns"))
368 parser.add_argument(
"-b",
"--bypass-check", action=
'store_true',
369 help=(
"Bypass the compatibility check for the headers. If true, the header and init block " 370 "will be just a duplicate from the first input file, and events are concatenated without " 372 parser.add_argument(
"-n",
"--number-events", action=
'store_true',
373 help=(
"Add a tag to number each lhe event. Needed for Herwig to find correct lhe events"))
374 parser.add_argument(
"--debug", action=
'store_true',
375 help=
"Use the debug mode.")
376 args = parser.parse_args(argv)
379 format=
'[%(levelname)s] %(message)s',
380 level=logging.INFO
if not args.debug
else DEBUG)
381 logging.info(
'>>> launch mergeLHE.py in %s' % os.path.abspath(os.getcwd()))
384 assert len(args.input_files), \
385 (
'Please specify your input LHE files by -i/--input-files. ' 386 'Run \'mergeLHE.py -h\' for details.')
388 for path
in args.input_files.split(
','):
389 find_files = glob.glob(path)
390 if len(find_files) == 0:
391 logging.info(
'Warning: cannot find files in %s' % path)
392 input_files += find_files
394 logging.info(
'>>> Merge %d files: [%s]' % (len(input_files),
', '.
join(input_files)))
395 logging.info(
'>>> Write to output: %s ' % args.output_file)
397 if not os.path.exists(os.path.dirname(os.path.realpath(args.output_file))):
398 os.makedirs(os.path.dirname(os.path.realpath(args.output_file)))
400 if args.number_events:
402 for input_file
in input_files:
406 assert len(input_files) > 0,
'Input LHE files should be more than 0.' 407 if len(input_files) == 1:
408 logging.warning(
'Input LHE only has 1 file. Will copy this file to the destination.')
410 shutil.copy(input_files[0], args.output_file)
412 assert [args.force_mglo_merger, args.force_cpp_merger].
count(
True) <= 1, \
413 "Can only specify at most one from --force-mglo-merger or --force-cpp-merger." 416 if args.force_mglo_merger:
418 elif args.force_cpp_merger:
422 input_files, args.output_file, bypass_check=args.bypass_check)
428 if __name__==
"__main__":
def __init__(self, input_files, output_file)
def merge_init_blocks(self)
bool any(const std::vector< T > &v, const T &what)
ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr float zip(ConstView const &tracks, int32_t i)
def check_header_compatibility(self)
def replace(string, replacements)
def __init__(self, input_files, output_file, kwargs)
def number_events(input_file, output_file=None, offset=0)
def split(sequence, size)
def __init__(self, input_files, output_file, kwargs)
static std::string join(char **cmd)
def __init__(self, input_files, output_file, kwargs)
void add(std::map< std::string, TH1 *> &h, TH1 *hist)
def file_iterator(self, path)