CMS 3D CMS Logo

mps_parse_pedechi2hist.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 # Original author: Joerg Behr
4 # Translation from Perl to Python: Gregor Mittag
5 #
6 # This script reads the histogram file produced by Pede and it extracts the plot
7 # showing the average chi2/ndf per Mille binary number. After reading the MPS
8 # database, for which the file name has to be provided, an output file called
9 # chi2pedehis.txt is produced where the first column corresponds to the
10 # associated name, the second column corresponds to the Mille binary number, and
11 # the last column is equal to <chi2/ndf>. As further argument this scripts
12 # expects the file name of the Pede histogram file -- usually millepede.his. The
13 # last required argument represents the location of the Python config which was
14 # used by CMSSW.
15 #
16 # Use createChi2ndfplot.C to plot the output of this script.
17 
18 from __future__ import print_function
19 import os
20 import sys
21 import re
22 import argparse
23 
24 import Alignment.MillePedeAlignmentAlgorithm.mpslib.tools as mps_tools
25 import Alignment.MillePedeAlignmentAlgorithm.mpslib.Mpslibclass as mpslib
26 
27 
28 
29 def main(argv = None):
30  """Main routine of the script.
31 
32  Arguments:
33  - `argv`: arguments passed to the main routine
34  """
35 
36  if argv == None:
37  argv = sys.argv[1:]
38 
39  parser = argparse.ArgumentParser(description="Analysis pede histogram file")
40  parser.add_argument("-d", "--mps-db", dest="mps_db", required=True,
41  metavar="PATH", help="MPS database file ('mps.db')")
42  parser.add_argument("--his", dest="his_file", required=True,
43  metavar="PATH", help="pede histogram file")
44  parser.add_argument("-c", "--cfg", dest="cfg", metavar="PATH", required=True,
45  help="python configuration file of pede job")
46  parser.add_argument("-b", "--no-binary-check", dest="no_binary_check",
47  default=False, action="store_true",
48  help=("skip check for existing binaries "
49  "(possibly needed if used interactively)"))
50  args = parser.parse_args(argv)
51 
52 
53  for input_file in (args.mps_db, args.his_file, args.cfg):
54  if not os.path.exists(input_file):
55  print("Could not find input file:", input_file)
56  sys.exit(1)
57 
58  ids, names = get_all_ids_names(args.mps_db)
59  used_binaries = get_used_binaries(args.cfg, args.no_binary_check)
60  his_data = get_his_data(args.his_file)
61 
62  if len(his_data) != len(used_binaries):
63  print("The number of used binaries is", len(used_binaries), end=' ')
64  print("whereas in contrast, however, the <chi2/ndf> histogram in Pede has", end=' ')
65  print(len(his_data), "bins (Pede version >= rev92 might help if #bins < #binaries).", end=' ')
66  print("Exiting.")
67  sys.exit(1)
68 
69  with open("chi2pedehis.txt", "w") as f:
70  for i, b in enumerate(used_binaries):
71  index = ids.index(b)
72  name = names[index]
73  f.write(" ".join([name, "{:03d}".format(b), his_data[i]])+"\n")
74 
75 
76 
77 def get_all_ids_names(mps_db):
78  """Returns two lists containing the mille job IDs and the associated names.
79 
80  Arguments:
81  - `mps_db`: path to the MPS database file
82  """
83 
84  lib = mpslib.jobdatabase()
85  lib.read_db(mps_db)
86 
87  ids = lib.JOBNUMBER[:lib.nJobs]
88  names = lib.JOBSP3[:lib.nJobs]
89 
90  return ids, names
91 
92 
93 def get_used_binaries(cfg, no_binary_check):
94  """Returns list of used binary IDs.
95 
96  Arguments:
97  - `cfg`: python config used to run the pede job
98  - `no_binary_check`: if 'True' a check for file existence is skipped
99  """
100 
101  cms_process = mps_tools.get_process_object(cfg)
102 
103  binaries = cms_process.AlignmentProducer.algoConfig.mergeBinaryFiles
104  if no_binary_check:
105  used_binaries = binaries
106  else:
107  # following check works only if 'args.cfg' was run from the same directory:
108  used_binaries = [b for b in binaries
109  if os.path.exists(os.path.join(os.path.dirname(cfg), b))]
110 
111  used_binaries = [int(re.sub(r"milleBinary(\d+)\.dat", r"\1", b))
112  for b in used_binaries]
113 
114  return used_binaries
115 
116 
117 def get_his_data(his_file):
118  """Parse the pede histogram file.
119 
120  Arguments:
121  - `his_file`: pede histogram file
122  """
123 
124  his_data = []
125  with open(his_file, "r") as his:
126  found_chi2_start = False;
127 
128  for line in his:
129  if r"final <Chi^2/Ndf> from accepted local fits vs file number" in line:
130  found_chi2_start = True
131  if not found_chi2_start:
132  continue
133  else:
134  if r"end of xy-data" in line: break
135  if not re.search("\d", line): continue
136  if re.search(r"[a-z]", line): continue
137  splitted = line.split()
138  his_data.append(splitted[-1])
139 
140  return his_data
141 
142 
143 
144 if __name__ == "__main__":
145  main()
join
static std::string join(char **cmd)
Definition: RemoteFile.cc:17
mps_parse_pedechi2hist.get_used_binaries
def get_used_binaries(cfg, no_binary_check)
Definition: mps_parse_pedechi2hist.py:93
mps_parse_pedechi2hist.main
def main(argv=None)
Definition: mps_parse_pedechi2hist.py:29
mps_parse_pedechi2hist.get_all_ids_names
def get_all_ids_names(mps_db)
Definition: mps_parse_pedechi2hist.py:77
mps_parse_pedechi2hist.get_his_data
def get_his_data(his_file)
Definition: mps_parse_pedechi2hist.py:117
print
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:46
createfilelist.int
int
Definition: createfilelist.py:10
main
Definition: main.py:1
format