CMS 3D CMS Logo

dqm-mbProfile.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 
3 import os
4 import collections
5 import logging
6 import resource
7 import time
8 import argparse
9 import subprocess
10 import signal
11 import json
12 import inspect
13 import shutil
14 
15 LOG_FORMAT='%(asctime)s: %(name)-20s - %(levelname)-8s - %(message)s'
16 logging.basicConfig(format=LOG_FORMAT)
17 log = logging.getLogger("mbProfile")
18 log.setLevel(logging.INFO)
19 
20 def read_procfs(ppath, only_ppid=True):
21  def read(f):
22  fp = os.path.join(ppath, f)
23  with open(fp) as fd:
24  return fd.read()
25 
26  def read_status():
27  st = {}
28 
29  fp = os.path.join(ppath, "status")
30  with open(fp) as fd:
31  for line in fd.readlines():
32  if not line: continue
33 
34  key, value = line.split(":", 1)
35  st[key] = value.strip()
36 
37  return st
38 
39  try:
40  dct = {}
41 
42  dct["statm"] = read("statm").strip()
43  dct["stat"] = read("stat").strip()
44  dct["cmdline"] = read("cmdline").strip().replace("\0", " ")
45 
46  status = read_status()
47  dct["status"] = status
48  dct["pid"] = int(status["Pid"])
49  dct["parent_pid"] = int(status["PPid"])
50 
51  return dct
52  except:
53  log.warning("Exception in read_procfs.", exc_info=True)
54  pass
55 
57  lst = os.listdir("/proc/")
58  for f in lst:
59  if not f.isdigit(): continue
60 
61  proc = read_procfs(os.path.join("/proc", f))
62  if proc:
63  yield proc
64 
65 def get_children(ppid):
66  """ Select all processes which are descendant from ppid (exclusive). """
67 
68  pid_dct = {}
69  for proc in build_process_list():
70  proc["_children"] = []
71  pid_dct[proc["pid"]] = proc
72 
73  # fill in children array
74  for pid in list(pid_dct.keys()):
75  parent_pid = pid_dct[pid]["parent_pid"]
76 
77  if parent_pid in pid_dct:
78  pid_dct[parent_pid]["_children"].append(pid)
79 
80  # now just walk down the tree
81  if ppid is None or ppid not in pid_dct:
82  # process has quit, we exit
83  return []
84 
85  accepted = []
86  to_accept = collections.deque([ppid, ])
87 
88  while to_accept:
89  head = pid_dct[to_accept.popleft()]
90 
91  # do not include the monitoring pid
92  if head["pid"] != ppid:
93  accepted.append(head)
94 
95  to_accept.extend(head.get("_children", []))
96  head["children"] = head["_children"]
97  del head["_children"]
98 
99  # deleting children breaks infinite loops
100  # but Dima, can a process tree contain a loop? yes - via race-condition in reading procfs
101 
102  return accepted
103 
105  def __init__(self, args):
106  self.time = time.time()
107  self.final = False
108  self.pid = None
109  self.known_pids = {}
110 
111  self.ru = {}
112  self.ru_diff = {}
113 
114  self._offset_ru = None
115  self._args = args
116 
117  if self._args.file:
118  self._file = open(self._args.file, "w")
119  else:
120  self._file = None
121 
122  self.update()
123 
124  def update_ru(self):
125  fields_to_subtract = (
126  "ru_utime", "ru_stime", "ru_maxrss", "ru_minflt", "ru_majflt", "ru_nswap",
127  "ru_inblock", "ru_oublock", "ru_msgsnd", "ru_msgrcv", "ru_nsignals", "ru_nvcsw", "ru_nivcsw",
128  )
129 
130  rusage = resource.getrusage(resource.RUSAGE_CHILDREN)
131  self.ru = rusage
132 
133  if self._offset_ru is None:
134  self._offset_ru = rusage
135 
136  for field in fields_to_subtract:
137  current = getattr(self.ru, field)
138  base = getattr(self._offset_ru, field)
139 
140  self.ru_diff[field] = current - base
141 
142  # this is taken from: http://github.com/pixelb/scripts/commits/master/scripts/ps_mem.py
143  def read_smaps(self, proc_dict):
144  Private, Shared, Pss = 0, 0, 0
145 
146  fp = os.path.join("/proc/%d" % proc_dict["pid"], "smaps")
147  with open(fp) as fd:
148  for line in fd.readlines():
149  if line.startswith("Shared"):
150  Shared += int(line.split()[1])
151  elif line.startswith("Private"):
152  Private += int(line.split()[1])
153  elif line.startswith("Pss"):
154  Pss += int(line.split()[1])
155 
156  proc_dict["smaps_shared"] = Shared * 1024
157  proc_dict["smaps_private"] = Private * 1024
158  proc_dict["smaps_pss"] = Pss * 1024
159 
160  def update_proc(self):
161  procs = get_children(os.getpid())
162 
163  # we can only do it here, permision-wise
164  # ie only for owned processes
165  for proc in procs:
166  try:
167  self.read_smaps(proc)
168  except:
169  log.warning("Exception in read_smaps.", exc_info=True)
170 
171  # we need to mark not-running ones as such
172  stopped = set(self.known_pids.keys())
173  for proc in procs:
174  proc["running"] = True
175 
176  pid = proc["pid"]
177  self.known_pids[pid] = proc
178 
179  if pid in stopped:
180  stopped.remove(pid)
181 
182  for pid in stopped:
183  self.known_pids[pid]["running"] = False
184 
185  def update(self):
186  self.time = time.time()
187 
188  self.update_ru()
189  self.update_proc()
190 
191  if self._file:
192  json.dump(self.to_dict(), self._file)
193  self._file.write("\n")
194  self._file.flush()
195 
196  log.info("Written profile to: %s, took=%.03f", self._args.file, time.time() - self.time)
197 
198  def to_dict(self):
199  dct = collections.OrderedDict()
200  dct['time'] = self.time
201  dct['pid'] = self.pid
202  dct['final'] = self.final
203 
204  dct['ru_diff'] = dict(self.ru_diff)
205  dct['ru'] = dict((k, v) for k, v in inspect.getmembers(self.ru) if k.startswith('ru_'))
206  dct['known_pids'] = dict(self.known_pids)
207  return dct
208 
209  def finish(self):
210  self.final = True
211  self.update()
212 
213  if self._file:
214  self._file.close()
215  self._file = None
216  else:
217  log.info("ru_diff: %s", self.ru_diff)
218 
219 
220 ALARM_TIMER = 1
221 ALARM_P_OBJECT = None
222 
223 def handle_alarm(num, frame):
224  if ALARM_P_OBJECT:
225  ALARM_P_OBJECT.update()
226 
227  signal.alarm(ALARM_TIMER)
228 
229 def run_and_monitor(args):
230  profile = Profile(args)
231 
232  proc = subprocess.Popen(args.pargs)
233  profile.pid = proc.pid
234 
235  global ALARM_P_OBJECT
236  ALARM_P_OBJECT = profile
237 
238  signal.signal(signal.SIGALRM, handle_alarm)
239  signal.alarm(ALARM_TIMER)
240 
241  proc.wait()
242  profile.finish()
243 
244 def find_and_write_html(p, args):
245  # create the dir if necessary
246  if p and not os.path.exists(p):
247  os.makedirs(p)
248 
249  html_paths = [
250  os.path.join(os.getenv("CMSSW_BASE"),"src/DQMServices/Components/data/html"),
251  os.path.join(os.getenv("CMSSW_RELEASE_BASE"), "src/DQMServices/Components/data/html"),
252  ]
253 
254  def find_file(f):
255  fails = []
256  for p in html_paths:
257  x = os.path.join(p, f)
258  if os.path.exists(x):
259  return x
260  else:
261  fails.append(x)
262 
263  log.warning("Could not find html file: %s (%s)", f, fails)
264 
265  for f in ['mbGraph.js', 'mbGraph.html']:
266  target_fn = os.path.join(p, f)
267  source_fn = find_file(f)
268  if source_fn:
269  log.info("Copying %s to %s", source_fn, target_fn)
270  shutil.copyfile(source_fn, target_fn)
271 
272  # create json file
273  target_fn = os.path.join(p, "mbGraph.json")
274  log.info("Creating %s", target_fn)
275  with open(target_fn, "w") as fp:
276  dct = {
277  "file": os.path.basename(args.file),
278  "interval": args.i,
279  "env": {
280  "CMSSW_GIT_HASH": os.getenv("CMSSW_GIT_HASH"),
281  "CMSSW_RELEASE_BASE": os.getenv("CMSSW_RELEASE_BASE"),
282  "SCRAM_ARCH": os.getenv("SCRAM_ARCH"),
283  },
284  }
285 
286  json.dump(dct, fp, indent=2)
287 
288 
289 if __name__ == "__main__":
290  parser = argparse.ArgumentParser(description="Profile child processes and produce data for rss and such graphs.")
291  parser.add_argument("-f", "--file", type=str, default="performance.json", help="Filename to write.", metavar="performance.json")
292  parser.add_argument("-i", type=int, help="Time interval between profiles.", default=15)
293  parser.add_argument('-q', action='store_true', help="Reduce logging.")
294  parser.add_argument('-w', action='store_true', help="Write html helper files for rendering the performance file.")
295  parser.add_argument('pargs', nargs=argparse.REMAINDER)
296 
297  args = parser.parse_args()
298 
299  if not args.pargs:
300  parser.print_help()
301  sys.exit(-1)
302  elif args.pargs[0] == "--":
303  # compat with 2.6
304  args.pargs = args.pargs[1:]
305 
306  ALARM_TIMER = args.i
307 
308  if args.q:
309  log.setLevel(logging.WARNING)
310 
311  if args.w:
312  p = os.path.dirname(args.file)
313  find_and_write_html(p, args)
314 
315 
318 
319  run_and_monitor(args)
320 
def get_children(ppid)
def replace(string, replacements)
def __init__(self, args)
def read_smaps(self, proc_dict)
def handle_alarm(num, frame)
def build_process_list()
def run_and_monitor(args)
def find_and_write_html(p, args)
def read_procfs(ppath, only_ppid=True)