CMS 3D CMS Logo

LHEReader.cc
Go to the documentation of this file.
1 #include <algorithm>
2 #include <iomanip>
3 #include <iostream>
4 #include <memory>
5 
6 #include <cstdio>
7 #include <cstring>
8 #include <fstream>
9 #include <sstream>
10 #include <string>
11 #include <vector>
12 
13 #include <xercesc/sax2/Attributes.hpp>
14 #include <xercesc/dom/DOM.hpp>
15 
20 
24 
28 
29 #include "XMLUtils.h"
30 
32 
33 namespace lhef {
34 
35  static void logFileAction(char const *msg, std::string const &fileName) {
36  edm::LogAbsolute("fileAction") << std::setprecision(0) << edm::TimeOfDay() << msg << fileName;
38  }
39 
41  public:
42  Source() {}
43  virtual ~Source() {}
45  };
46 
48  public:
49  FileSource(const std::string &fileURL) {
50  using namespace edm::storage;
51  auto storage = StorageFactory::get()->open(fileURL, IOFlags::OpenRead);
52 
53  if (!storage)
54  throw cms::Exception("FileOpenError")
55  << "Could not open LHE file \"" << fileURL << "\" for reading" << std::endl;
56 
57  fileStream = std::make_unique<StorageWrap>(std::move(storage));
58  }
59 
60  ~FileSource() override {}
61 
63 
64  private:
65  std::unique_ptr<StorageWrap> fileStream;
66  };
67 
69  public:
71  if (inputs.empty())
72  throw cms::Exception("StreamOpenError") << "Empty LHE file string name \"" << std::endl;
73 
74  std::stringstream *tmpis = new std::stringstream(inputs);
75  fileStream.reset(tmpis);
76  }
77 
78  ~StringSource() override {}
79 
81 
82  private:
83  std::unique_ptr<std::istream> fileStream;
84  };
85 
87  public:
88  typedef std::vector<std::pair<std::string, std::string> > wgt_info;
90  : impl(nullptr),
91  gotObject(kNone),
92  mode(kNone),
93  xmlHeader(nullptr),
94  xmlEvent(nullptr),
95  headerOk(false),
96  npLO(-99),
97  npNLO(-99) {}
98  ~XMLHandler() override {
99  if (xmlHeader)
100  xmlHeader->release();
101  if (xmlEvent)
102  xmlEvent->release();
103  }
104 
105  enum Object { kNone = 0, kHeader, kInit, kComment, kEvent };
106 
107  void reset() {
108  headerOk = false;
109  weightsinevent.clear();
110  gotObject = kNone;
111  mode = kNone;
112  }
113 
114  const wgt_info &weightInfo() const { return weightsinevent; }
115 
116  protected:
117  void startElement(const XMLCh *const uri,
118  const XMLCh *const localname,
119  const XMLCh *const qname,
120  const Attributes &attributes) override;
121 
122  void endElement(const XMLCh *const uri, const XMLCh *const localname, const XMLCh *const qname) override;
123 
124  void characters(const XMLCh *const chars, const XMLSize_t length) override;
125  void comment(const XMLCh *const chars, const XMLSize_t length) override;
126 
127  private:
128  friend class LHEReader;
129 
130  bool skipEvent = false;
131  std::unique_ptr<DOMImplementation> impl;
135  DOMDocument *xmlHeader;
136  DOMDocument *xmlEvent;
137  std::vector<DOMElement *> xmlNodes, xmlEventNodes;
138  bool headerOk;
139  std::vector<LHERunInfo::Header> headers;
141  int npLO;
142  int npNLO;
143  std::vector<float> scales;
144  int evtnum = -1;
145  };
146 
147  static void attributesToDom(DOMElement *dom, const Attributes &attributes) {
148  for (unsigned int i = 0; i < attributes.getLength(); i++) {
149  const XMLCh *name = attributes.getQName(i);
150  const XMLCh *value = attributes.getValue(i);
151 
152  dom->setAttribute(name, value);
153  }
154  }
155 
156  static void fillHeader(LHERunInfo::Header &header, const char *data, int len = -1) {
157  const char *end = len >= 0 ? (data + len) : nullptr;
158  while (*data && (!end || data < end)) {
159  std::size_t len = std::strcspn(data, "\r\n");
160  if (end && data + len > end)
161  len = end - data;
162  if (data[len] == '\r' && data[len + 1] == '\n')
163  len += 2;
164  else if (data[len])
165  len++;
166  header.addLine(std::string(data, len));
167  data += len;
168  }
169  }
170 
171  void LHEReader::XMLHandler::startElement(const XMLCh *const uri,
172  const XMLCh *const localname,
173  const XMLCh *const qname,
174  const Attributes &attributes) {
175  std::string name((const char *)XMLSimpleStr(qname));
176 
177  if (!headerOk) {
178  if (name != "LesHouchesEvents")
179  throw cms::Exception("InvalidFormat") << "LHE file has invalid header" << std::endl;
180  headerOk = true;
181  return;
182  }
183 
184  if (mode == kHeader) {
185  DOMElement *elem = xmlHeader->createElement(qname);
186  attributesToDom(elem, attributes);
187  xmlNodes.back()->appendChild(elem);
188  xmlNodes.push_back(elem);
189  return;
190  } else if (mode == kEvent) {
191  if (skipEvent) {
192  return;
193  }
194 
195  DOMElement *elem = xmlEvent->createElement(qname);
196  attributesToDom(elem, attributes);
197 
198  //TODO this is a hack (even more than the rest of this class)
199  if (name == "rwgt") {
200  xmlEventNodes[0]->appendChild(elem);
201  } else if (name == "wgt") {
202  xmlEventNodes[1]->appendChild(elem);
203  } else if (name == "scales") {
204  for (XMLSize_t iscale = 0; iscale < attributes.getLength(); ++iscale) {
205  int ipart = 0;
206  const char *scalename = XMLSimpleStr(attributes.getQName(iscale));
207  int nmatch = sscanf(scalename, "pt_clust_%d", &ipart);
208 
209  if (nmatch != 1) {
210  edm::LogError("Generator|LHEInterface") << "invalid attribute in <scales> tag" << std::endl;
211  }
212 
213  float scaleval;
214  const char *scalevalstr = XMLSimpleStr(attributes.getValue(iscale));
215  sscanf(scalevalstr, "%e", &scaleval);
216 
217  scales.push_back(scaleval);
218  }
219  } else if (name == "event_num") {
220  const char *evtnumstr = XMLSimpleStr(attributes.getValue(XMLString::transcode("num")));
221  sscanf(evtnumstr, "%d", &evtnum);
222  }
223  xmlEventNodes.push_back(elem);
224  return;
225  } else if (mode == kInit) {
226  //skip unknown tags in init block as well
227  return;
228  } else if (mode != kNone) {
229  throw cms::Exception("InvalidFormat") << "LHE file has invalid format" << std::endl;
230  }
231 
232  if (name == "header") {
233  if (!impl)
234  impl.reset(DOMImplementationRegistry::getDOMImplementation(XMLUniStr("Core")));
235 
236  xmlHeader = impl->createDocument(nullptr, qname, nullptr);
237  xmlNodes.resize(1);
238  xmlNodes[0] = xmlHeader->getDocumentElement();
239  mode = kHeader;
240  }
241  if (name == "init") {
242  mode = kInit;
243  } else if (name == "event") {
244  if (!skipEvent) {
245  if (!impl)
246  impl.reset(DOMImplementationRegistry::getDOMImplementation(XMLUniStr("Core")));
247 
248  if (xmlEvent)
249  xmlEvent->release();
250  xmlEvent = impl->createDocument(nullptr, qname, nullptr);
251  weightsinevent.resize(0);
252  scales.clear();
253 
254  npLO = -99;
255  npNLO = -99;
256  const XMLCh *npLOval = attributes.getValue(XMLString::transcode("npLO"));
257  if (npLOval) {
258  const char *npLOs = XMLSimpleStr(npLOval);
259  sscanf(npLOs, "%d", &npLO);
260  }
261  const XMLCh *npNLOval = attributes.getValue(XMLString::transcode("npNLO"));
262  if (npNLOval) {
263  const char *npNLOs = XMLSimpleStr(npNLOval);
264  sscanf(npNLOs, "%d", &npNLO);
265  }
266 
267  xmlEventNodes.resize(1);
268  xmlEventNodes[0] = xmlEvent->getDocumentElement();
269  }
270  mode = kEvent;
271  }
272 
273  if (mode == kNone)
274  throw cms::Exception("InvalidFormat") << "LHE file has invalid format" << std::endl;
275 
276  buffer.clear();
277  }
278 
279  void LHEReader::XMLHandler::endElement(const XMLCh *const uri,
280  const XMLCh *const localname,
281  const XMLCh *const qname) {
282  std::string name((const char *)XMLSimpleStr(qname));
283 
284  if (mode) {
285  if (mode == kHeader && xmlNodes.size() > 1) {
286  xmlNodes.resize(xmlNodes.size() - 1);
287  return;
288  } else if (mode == kHeader) {
289  std::unique_ptr<DOMLSSerializer> writer(impl->createLSSerializer());
290  std::unique_ptr<DOMLSOutput> outputDesc(impl->createLSOutput());
291  assert(outputDesc.get());
292  outputDesc->setEncoding(XMLUniStr("UTF-8"));
293 
294  for (DOMNode *node = xmlNodes[0]->getFirstChild(); node; node = node->getNextSibling()) {
295  XMLSimpleStr buffer(writer->writeToString(node));
296 
298  const char *p, *q;
299  DOMElement *elem;
300 
301  switch (node->getNodeType()) {
302  case DOMNode::ELEMENT_NODE:
303  elem = static_cast<DOMElement *>(node);
304  type = (const char *)XMLSimpleStr(elem->getTagName());
305  p = std::strchr((const char *)buffer, '>') + 1;
306  q = std::strrchr(p, '<');
307  break;
308  case DOMNode::COMMENT_NODE:
309  type = "";
310  p = buffer + 4;
311  q = buffer + strlen(buffer) - 3;
312  break;
313  default:
314  type = "<>";
315  p = buffer + std::strspn(buffer, " \t\r\n");
316  if (!*p)
317  continue;
318  q = p + strlen(p);
319  }
321  fillHeader(header, p, q - p);
322  headers.push_back(header);
323  }
324 
325  xmlHeader->release();
326  xmlHeader = nullptr;
327  } else if (name == "event" && mode == kEvent &&
328  (skipEvent || (!xmlEventNodes.empty()))) { // handling of weights in LHE file
329 
330  if (skipEvent) {
331  gotObject = mode;
332  mode = kNone;
333  return;
334  }
335 
336  for (DOMNode *node = xmlEventNodes[0]->getFirstChild(); node; node = node->getNextSibling()) {
337  switch (node->getNodeType()) {
338  case DOMNode::ELEMENT_NODE: // rwgt
339  for (DOMNode *rwgt = xmlEventNodes[1]->getFirstChild(); rwgt; rwgt = rwgt->getNextSibling()) {
340  DOMNode *attr = rwgt->getAttributes()->item(0);
341  XMLSimpleStr atname(attr->getNodeValue());
342  XMLSimpleStr weight(rwgt->getFirstChild()->getNodeValue());
343  switch (rwgt->getNodeType()) {
344  case DOMNode::ELEMENT_NODE:
345  weightsinevent.push_back(std::make_pair((const char *)atname, (const char *)weight));
346  break;
347  default:
348  break;
349  }
350  }
351  break;
352  case DOMNode::TEXT_NODE: // event information
353  {
354  XMLSimpleStr data(node->getNodeValue());
355  buffer.append(data);
356  } break;
357  default:
358  break;
359  }
360  }
361  } else if (mode == kEvent) {
362  //skip unknown tags
363  return;
364  }
365 
366  if (gotObject != kNone)
367  throw cms::Exception("InvalidState") << "Unexpected pileup in"
368  " LHEReader::XMLHandler::endElement"
369  << std::endl;
370 
371  gotObject = mode;
372  mode = kNone;
373  }
374  }
375 
376  void LHEReader::XMLHandler::characters(const XMLCh *const data_, const XMLSize_t length) {
377  if (mode == kHeader) {
378  DOMText *text = xmlHeader->createTextNode(data_);
379  xmlNodes.back()->appendChild(text);
380  return;
381  }
382 
383  if (XMLSimpleStr::isAllSpaces(data_, length))
384  return;
385 
386  unsigned int offset = 0;
387  while (offset < length && XMLSimpleStr::isSpace(data_[offset]))
388  offset++;
389 
390  if (mode == kEvent) {
391  if (!skipEvent) {
392  DOMText *text = xmlEvent->createTextNode(data_ + offset);
393  xmlEventNodes.back()->appendChild(text);
394  }
395  return;
396  }
397 
398  if (mode == kNone)
399  throw cms::Exception("InvalidFormat") << "LHE file has invalid format" << std::endl;
400 
401  XMLSimpleStr data(data_ + offset);
402  buffer.append(data);
403  }
404 
405  void LHEReader::XMLHandler::comment(const XMLCh *const data_, const XMLSize_t length) {
406  if (mode == kHeader) {
407  DOMComment *comment = xmlHeader->createComment(data_);
408  xmlNodes.back()->appendChild(comment);
409  return;
410  }
411 
412  XMLSimpleStr data(data_);
413 
416  headers.push_back(header);
417  }
418 
420  : fileURLs(params.getUntrackedParameter<std::vector<std::string> >("fileNames")),
421  strName(""),
422  firstEvent(params.getUntrackedParameter<unsigned int>("skipEvents", 0)),
423  maxEvents(params.getUntrackedParameter<int>("limitEvents", -1)),
424  curIndex(0),
425  handler(new XMLHandler()) {}
426 
427  LHEReader::LHEReader(const std::vector<std::string> &fileNames, unsigned int firstEvent)
428  : fileURLs(fileNames),
429  strName(""),
431  maxEvents(-1),
432  curIndex(0),
433  handler(new XMLHandler()) {}
434 
436  : strName(inputs), firstEvent(firstEvent), maxEvents(-1), curIndex(0), handler(new XMLHandler()) {}
437 
439  // Explicitly release "orphaned" resources
440  // that were created through DOM implementation
441  // createXXXX factory method *before* last
442  // XMLPlatformUtils::Terminate is called.
443  handler.release();
444  curDoc.release();
445  curSource.release();
446  }
447 
448  std::shared_ptr<LHEEvent> LHEReader::next(bool *newFileOpened) {
449  while (curDoc.get() || curIndex < fileURLs.size() || (fileURLs.empty() && !strName.empty())) {
450  if (!curDoc.get()) {
451  if (!platform) {
452  //If we read multiple files, the XercesPlatform must live longer than any one
453  // XMLDocument.
455  }
456  if (!fileURLs.empty()) {
457  logFileAction(" Initiating request to open LHE file ", fileURLs[curIndex]);
458  curSource = std::make_unique<FileSource>(fileURLs[curIndex]);
459  logFileAction(" Successfully opened LHE file ", fileURLs[curIndex]);
460  if (newFileOpened != nullptr)
461  *newFileOpened = true;
462  ++curIndex;
463  } else if (!strName.empty()) {
464  curSource = std::make_unique<StringSource>(strName);
465  }
466  handler->reset();
467  curDoc.reset(curSource->createReader(*handler));
468  curRunInfo.reset();
469  }
470  handler->skipEvent = firstEvent > 0;
471 
472  XMLHandler::Object event = handler->gotObject;
473  handler->gotObject = XMLHandler::kNone;
474 
475  switch (event) {
476  case XMLHandler::kNone:
477  if (!curDoc->parse()) {
478  curDoc.reset();
479  logFileAction(" Closed LHE file ", fileURLs[curIndex - 1]);
480  return std::shared_ptr<LHEEvent>();
481  }
482  break;
483 
484  case XMLHandler::kHeader:
485  break;
486 
487  case XMLHandler::kInit: {
488  std::istringstream data;
489  data.str(handler->buffer);
490  handler->buffer.clear();
491 
492  curRunInfo.reset(new LHERunInfo(data));
493 
494  std::for_each(handler->headers.begin(),
495  handler->headers.end(),
496  std::bind(&LHERunInfo::addHeader, curRunInfo.get(), std::placeholders::_1));
497  handler->headers.clear();
498  } break;
499 
501  break;
502 
503  case XMLHandler::kEvent: {
504  if (!curRunInfo.get())
505  throw cms::Exception("InvalidState") << "Got LHE event without"
506  " initialization."
507  << std::endl;
508 
509  if (firstEvent > 0) {
510  firstEvent--;
511  continue;
512  }
513 
514  if (maxEvents == 0)
515  return std::shared_ptr<LHEEvent>();
516  else if (maxEvents > 0)
517  maxEvents--;
518 
519  std::istringstream data;
520  data.str(handler->buffer);
521  handler->buffer.clear();
522 
523  std::shared_ptr<LHEEvent> lheevent;
524  lheevent.reset(new LHEEvent(curRunInfo, data));
525  const XMLHandler::wgt_info &info = handler->weightsinevent;
526  for (size_t i = 0; i < info.size(); ++i) {
527  double num = -1.0;
528  sscanf(info[i].second.c_str(), "%le", &num);
529  lheevent->addWeight(gen::WeightsInfo(info[i].first, num));
530  }
531  lheevent->setNpLO(handler->npLO);
532  lheevent->setNpNLO(handler->npNLO);
533  lheevent->setEvtNum(handler->evtnum);
534  handler->evtnum = -1;
535  //fill scales
536  if (!handler->scales.empty()) {
537  lheevent->setScales(handler->scales);
538  }
539  return lheevent;
540  }
541  }
542  }
543 
544  return std::shared_ptr<LHEEvent>();
545  }
546 
547 } // namespace lhef
std::shared_ptr< void > platform
Definition: LHEReader.h:42
XMLDocument * createReader(XMLDocument::Handler &handler) override
Definition: LHEReader.cc:62
static const TGPicture * info(bool iBackgroundIsBlack)
LHEReader(const edm::ParameterSet &params)
Definition: LHEReader.cc:419
static void logFileAction(char const *msg, std::string const &fileName)
Definition: LH5Reader.cc:31
static void fillHeader(LHERunInfo::Header &header, const char *data, int len=-1)
Definition: LHEReader.cc:156
std::unique_ptr< std::istream > fileStream
Definition: LHEReader.cc:83
void FlushMessageLog()
StringSource(const std::string &inputs)
Definition: LHEReader.cc:70
void endElement(const XMLCh *const uri, const XMLCh *const localname, const XMLCh *const qname) override
Definition: LHEReader.cc:279
std::vector< std::pair< std::string, std::string > > wgt_info
Definition: LHEReader.cc:88
const std::vector< std::string > fileURLs
Definition: LHEReader.h:29
static bool isSpace(XMLCh ch)
Definition: XMLUtils.h:88
void characters(const XMLCh *const chars, const XMLSize_t length) override
Definition: LHEReader.cc:376
Definition: weight.py:1
Log< level::Error, false > LogError
assert(be >=bs)
std::vector< LHERunInfo::Header > headers
Definition: LHEReader.cc:139
U second(std::pair< T, U > const &p)
ROOT::VecOps::RVec< UChar_t > chars
Definition: Resolutions.cc:6
void addHeader(const Header &header)
Definition: LHERunInfo.h:61
std::unique_ptr< StorageWrap > fileStream
Definition: LHEReader.cc:65
static const char *const kComment
static bool isAllSpaces(const XMLCh *str, unsigned int length)
Definition: XMLUtils.h:84
const std::string strName
Definition: LHEReader.h:32
Definition: value.py:1
static std::shared_ptr< void > platformHandle()
Definition: XMLUtils.h:49
std::unique_ptr< DOMImplementation > impl
Definition: LHEReader.cc:131
std::vector< DOMElement * > xmlNodes
Definition: LHEReader.cc:137
std::unique_ptr< XMLDocument > curDoc
Definition: LHEReader.h:39
void startElement(const XMLCh *const uri, const XMLCh *const localname, const XMLCh *const qname, const Attributes &attributes) override
Definition: LHEReader.cc:171
const wgt_info & weightInfo() const
Definition: LHEReader.cc:114
std::unique_ptr< Source > curSource
Definition: LHEReader.h:38
XMLDocument * createReader(XMLDocument::Handler &handler) override
Definition: LHEReader.cc:80
tuple msg
Definition: mps_check.py:286
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:80
void comment(const XMLCh *const chars, const XMLSize_t length) override
Definition: LHEReader.cc:405
virtual XMLDocument * createReader(XMLDocument::Handler &handler)=0
unsigned int firstEvent
Definition: LHEReader.h:33
std::shared_ptr< LHERunInfo > curRunInfo
Definition: LHEReader.h:40
Log< level::System, true > LogAbsolute
std::vector< float > scales
Definition: LHEReader.cc:143
std::unique_ptr< XMLHandler > handler
Definition: LHEReader.h:41
std::shared_ptr< LHEEvent > next(bool *newFileOpened=nullptr)
Definition: LHEReader.cc:448
#define get
FileSource(const std::string &fileURL)
Definition: LHEReader.cc:49
static void attributesToDom(DOMElement *dom, const Attributes &attributes)
Definition: LHEReader.cc:147
unsigned int curIndex
Definition: LHEReader.h:35
def move(src, dest)
Definition: eostools.py:511
Definition: event.py:1