3 from __future__
import print_function
16 """Base class of the LHE merge schemes""" 23 """Output the merged LHE""" 26 class DefaultLHEMerger(BaseLHEMerger):
27 """Default LHE merge scheme that copies the header of the first LHE file, 28 merges and outputs the init block, then concatenates all event blocks.""" 30 def __init__(self, input_files, output_file, **kwargs):
31 super(DefaultLHEMerger, self).
__init__(input_files, output_file)
44 """Line-by-line iterator of a txt file""" 45 with open(path,
'r') as f: 50 """Check if all headers for input files are consistent.""" 55 inconsistent_error_info = (
"Incompatibility found in LHE headers: %s. " 56 "Use -b/--bypass-check to bypass the check.")
58 'nevent',
'numevts',
'iseed',
'Seed',
'Random',
'.log',
'.dat',
'.lhe',
59 'Number of Events',
'Integrated weight' 64 logging.debug(
'header line number: %s' \
68 ), inconsistent_error_info %
"line number does not match" 71 if any([k
in line_zip[0]
for k
in allow_diff_keys]):
72 logging.debug(
'Captured \'%s\', we allow difference in this line' % line_zip[0])
74 if not all([line_zip[0] == line
for line
in line_zip]):
76 for i, line
in enumerate(line_zip):
77 inconsistent_lines_set[i].
add(line)
79 assert all([inconsistent_lines_set[0] == lset
for lset
in inconsistent_lines_set]), \
80 inconsistent_error_info % (
'{' +
', '.
join(inconsistent_lines_set[0]) +
'}')
83 """Merge the headers of input LHEs. Need special handle for the MG5 LO case.""" 90 (
r"<MGGenerationInfo>\s+#\s*Number of Events\s*\:\s*(\S+)\s+" 91 r"#\s*Integrated weight \(pb\)\s*\:\s*(\S+)\s+<\/MGGenerationInfo>"),
96 [
float(info.group(2)) * nevt
for info, nevt
in zip(match_geninfo, self.
_nevent)]
98 geninfo_combined = (
"<MGGenerationInfo>\n" 99 "# Number of Events : %d\n" 100 "# Integrated weight (pb) : %.10f\n</MGGenerationInfo>") \
102 logging.info(
'Detected: MG5 LO LHEs. Input <MGGenerationInfo>:\n\tnevt\txsec')
103 for info, nevt
in zip(match_geninfo, self.
_nevent):
104 logging.info(
'\t%d\t%.10f' % (nevt,
float(info.group(2))))
105 logging.info(
'Combined <MGGenerationInfo>:\n\t%d\t%.10f' \
109 return header_combined
116 """If all <init> blocks are identical, return the same <init> block 117 (in the case of Powheg LHEs); otherwise, calculate the output <init> 118 blocks by merging the input blocks info using formula (same with the 119 MG5LOLHEMerger scheme): 120 XSECUP = sum(xsecup * no.events) / tot.events 121 XERRUP = sqrt( sum(sigma^2 * no.events^2) ) / tot.events 133 old_init_block = [{}
for _
in self.
_init_str]
141 for bl_line
in bl.split(
'\n')[1:nline + 1]:
142 bl_line_sp = bl_line.split()
143 old_init_block[i][
int(bl_line_sp[3])] = [
149 info_after_subprocess = bl.strip().
split(
'\n')[nline + 1:]
151 logging.info(
'Input file: %s' % self.
input_files[i])
152 for ipr
in sorted(
list(old_init_block[i].
keys()), reverse=
True):
154 logging.info(
' xsecup, xerrup, xmaxup, lprup: %.6E, %.6E, %.6E, %d' \
155 % tuple(old_init_block[i][ipr] + [ipr]))
160 if all([old_init_block[i] == old_init_block[0]
for i
in range(len(self.
_f))]):
163 'All input <init> blocks are identical. Output the same "<init> block.')
167 for i
in range(len(self.
_f)):
168 for ipr
in old_init_block[i]:
171 if ipr
not in new_init_block:
172 new_init_block[ipr] = [0., 0., 0.]
173 new_init_block[ipr][0] += old_init_block[i][ipr][0] * self.
_nevent[i]
174 new_init_block[ipr][1] += old_init_block[i][ipr][1]**2 * self.
_nevent[i]**2
175 new_init_block[ipr][2] =
max(new_init_block[ipr][2], old_init_block[i][ipr][2])
176 tot_nevent = sum([self.
_nevent[i]
for i
in range(len(self.
_f))])
180 +
' ' +
str(len(new_init_block)) +
'\n' 183 for ipr
in sorted(
list(new_init_block.keys()), reverse=
True):
185 new_init_block[ipr][0] /= tot_nevent
186 new_init_block[ipr][1] = math.sqrt(new_init_block[ipr][1]) / tot_nevent
187 logging.info(
' xsecup, xerrup, xmaxup, lprup: %.6E, %.6E, %.6E, %d' \
188 % tuple(new_init_block[ipr] + [ipr]))
189 self.
_merged_init_str +=
'%.6E %.6E %.6E %d\n' % tuple(new_init_block[ipr] + [ipr])
191 if len(info_after_subprocess):
199 for i
in range(len(self.
_f)):
202 while not re.search(
'\s*<init(>|\s)', line):
206 self._header_str.append(
''.
join(header))
210 for i
in range(len(self.
_f)):
213 while not re.search(
'\s*</init>', line):
217 self._init_str.append(
''.
join(init))
221 with open(
'.tmp.lhe',
'w')
as _fwtmp:
222 for i
in range(len(self.
_f)):
226 if re.search(
'\s*</event>', line):
228 if re.search(
'\s*</LesHouchesEvents>', line):
231 self._nevent.append(nevent)
242 event_norm = re.search(
243 r'\s(\w+)\s*=\s*event_norm\s',
245 if event_norm ==
'sum':
247 elif event_norm ==
'average':
249 logging.info((
"MG5 LO LHE with event_norm = %s detected. Will " 250 "recalculate weights in each event block.\n" 251 "Unit weight: %+.7E") % (event_norm, self.
_uwgt))
255 with open(
'.tmp.lhe',
'r') as ftmp: 256 sign = lambda x: -1
if x < 0
else 1
259 if re.search(
'\s*<event.*>', line)
and not re.search(
'\s*<event_num.*>', line):
264 orig_wgt =
float(line.split()[2])
265 fw.write(re.sub(
r'(^\s*\S+\s+\S+\s+)\S+(.+)',
r'\g<1>%+.7E\g<2>' \
267 elif re.search(
'\s*<wgt.*>.*</wgt>', line):
268 addi_wgt_str = re.search(
r'<wgt.*>\s*(\S+)\s*<\/wgt>', line).
group(1)
269 fw.write(line.replace(
270 addi_wgt_str,
'%+.7E' % (
float(addi_wgt_str) / orig_wgt * self.
_uwgt)))
275 with open(
'.tmp.lhe',
'r') as ftmp: 278 fw.write(
'</LesHouchesEvents>\n')
279 os.remove(
'.tmp.lhe')
283 """Use the merger script dedicated for MG5 LO LHEs, as introduced in 284 https://github.com/cms-sw/genproductions/blob/master/bin/MadGraph5_aMCatNLO/Utilities/merge.pl 287 def __init__(self, input_files, output_file, **kwargs):
288 super(MG5LOLHEMerger, self).
__init__(input_files, output_file)
290 'https://raw.githubusercontent.com/cms-sw/genproductions/5c1e865a6fbe3a762a28363835d9a804c9cf0dbe/bin/MadGraph5_aMCatNLO/Utilities/merge.pl' 294 (
'Use the merger script in genproductions dedicated for ' 295 'MadGraph5-produced LHEs'))
296 os.system(
'curl -s -L %s | perl - %s %s.gz banner.txt' \
299 os.system(
'rm banner.txt')
303 """Use the external mergeLheFiles.cpp file to merge LHE files, as introduced in 304 https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideSubgroupMC#1_2_Using_pLHE_campaigns 307 def __init__(self, input_files, output_file, **kwargs):
308 super(ExternalCppLHEMerger, self).
__init__(input_files, output_file)
310 'https://twiki.cern.ch/twiki/bin/viewfile/CMSPublic/SWGuideSubgroupMC?filename=mergeLheFiles.cpp;rev=2' 314 (
'Use the external mergeLheFiles.cpp file to merge LHE files.'))
316 with open(
'mergeLheFiles.cpp')
as f:
317 script_str = f.read()
318 with open(
'mergeLheFiles.cpp',
'w')
as fw:
319 fw.write(script_str.replace(
'/tmp/covarell/out.lhe', self.
output_file))
320 with open(
'input_files.txt',
'w')
as fw:
323 os.system(
'g++ -Wall -o mergeLheFiles mergeLheFiles.cpp')
324 os.system(
'./mergeLheFiles input_files.txt')
325 os.system(
'rm mergeLheFiles* input_files.txt')
329 """Main routine of the script. 332 - `argv`: arguments passed to the main routine 338 parser = argparse.ArgumentParser(
339 description=(
"A universal script that merges multiple LHE files for all possible conditions and in the most " 341 "A detailed description of the merging step (in the default mode):\n" 343 " a. assert consistency of the headers (allow difference for the info of e.g. #event, seed);\n" 344 " b. if not MG LO LHEs, will simply use the header from the first LHE; otherwise, reset the " 345 "<MGGenerationInfo> from the headers by merging the #event & xsec info;\n" 346 " 2. Init block: if all <init> blocks are the same, use the same as output; otherwise (the MG LO " 347 "case), merge them by recalculating the # of subprocess (LRPUP) and XSECUP, XERRUP, XMAXUP per " 349 " 3. Event block: concatenate all event blocks. If for MG LO LHEs, recalculate the per-event " 350 "XWGTUP and all <wgt> tags based on the new XSECUP, #event, and 'event_norm' read from the MG " 352 "For further development of this script please always validate the merging result on the test " 353 "routines: https://github.com/colizz/mergelhe_validate\n" 355 " mergeLHE.py -i 'thread*/*.lhe,another_file/another.lhe' -o output.lhe"),
356 formatter_class=argparse.RawTextHelpFormatter)
357 parser.add_argument(
"-i",
"--input-files", type=str,
358 help=
"Input LHE file paths separated by commas. Shell-type wildcards are supported.")
359 parser.add_argument(
"-o",
"--output-file",
360 default=
'output.lhe', type=str,
361 help=
"Output LHE file path.")
362 parser.add_argument(
"--force-mglo-merger", action=
'store_true',
363 help=(
"Force to use the merger script dedicated for MG5 LO LHEs, as introduced in " 364 "https://github.com/cms-sw/genproductions/blob/master/bin/MadGraph5_aMCatNLO/Utilities/merge.pl"))
365 parser.add_argument(
"--force-cpp-merger", action=
'store_true',
366 help=(
"Force to use the external mergeLheFiles.cpp file to merge LHE files, as introduced in " 367 "https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideSubgroupMC#1_2_Using_pLHE_campaigns"))
368 parser.add_argument(
"-b",
"--bypass-check", action=
'store_true',
369 help=(
"Bypass the compatibility check for the headers. If true, the header and init block " 370 "will be just a duplicate from the first input file, and events are concatenated without " 372 parser.add_argument(
"-n",
"--number-events", action=
'store_true',
373 help=(
"Add a tag to number each lhe event. Needed for Herwig to find correct lhe events"))
374 parser.add_argument(
"--debug", action=
'store_true',
375 help=
"Use the debug mode.")
376 args = parser.parse_args(argv)
379 format=
'[%(levelname)s] %(message)s',
380 level=logging.INFO
if not args.debug
else DEBUG)
381 logging.info(
'>>> launch mergeLHE.py in %s' % os.path.abspath(os.getcwd()))
384 assert len(args.input_files), \
385 (
'Please specify your input LHE files by -i/--input-files. ' 386 'Run \'mergeLHE.py -h\' for details.')
388 for path
in args.input_files.split(
','):
389 find_files = glob.glob(path)
390 if len(find_files) == 0:
391 logging.info(
'Warning: cannot find files in %s' % path)
392 input_files += find_files
394 logging.info(
'>>> Merge %d files: [%s]' % (len(input_files),
', '.
join(input_files)))
395 logging.info(
'>>> Write to output: %s ' % args.output_file)
397 if not os.path.exists(os.path.dirname(os.path.realpath(args.output_file))):
398 os.makedirs(os.path.dirname(os.path.realpath(args.output_file)))
400 if args.number_events:
402 for input_file
in input_files:
405 assert len(input_files) > 0,
'Input LHE files should be more than 0.' 406 if len(input_files) == 1:
407 logging.warning(
'Input LHE only has 1 file. Will copy this file to the destination.')
409 shutil.copy(input_files[0], args.output_file)
411 assert [args.force_mglo_merger, args.force_cpp_merger].
count(
True) <= 1, \
412 "Can only specify at most one from --force-mglo-merger or --force-cpp-merger." 415 if args.force_mglo_merger:
417 elif args.force_cpp_merger:
421 input_files, args.output_file, bypass_check=args.bypass_check)
427 if __name__==
"__main__":
def __init__(self, input_files, output_file)
def merge_init_blocks(self)
bool any(const std::vector< T > &v, const T &what)
def check_header_compatibility(self)
def replace(string, replacements)
def __init__(self, input_files, output_file, kwargs)
OutputIterator zip(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp)
def number_events(input_file, output_file=None, offset=0)
def __init__(self, input_files, output_file, kwargs)
void add(std::map< std::string, TH1 * > &h, TH1 *hist)
static std::string join(char **cmd)
def __init__(self, input_files, output_file, kwargs)
def file_iterator(self, path)
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run