CMS 3D CMS Logo

LHEReader.cc
Go to the documentation of this file.
1 #include <algorithm>
2 #include <iomanip>
3 #include <iostream>
4 #include <memory>
5 
6 #include <cstdio>
7 #include <cstring>
8 #include <fstream>
9 #include <sstream>
10 #include <string>
11 #include <vector>
12 
13 #include <xercesc/sax2/Attributes.hpp>
14 #include <xercesc/dom/DOM.hpp>
15 
20 
24 
28 
29 #include "XMLUtils.h"
30 
32 
33 namespace lhef {
34 
35  static void logFileAction(char const *msg, std::string const &fileName) {
36  edm::LogAbsolute("fileAction") << std::setprecision(0) << edm::TimeOfDay() << msg << fileName;
38  }
39 
41  public:
42  Source() {}
43  virtual ~Source() {}
45  };
46 
48  public:
49  FileSource(const std::string &fileURL) {
50  using namespace edm::storage;
51  auto storage = StorageFactory::get()->open(fileURL, IOFlags::OpenRead);
52 
53  if (!storage)
54  throw cms::Exception("FileOpenError")
55  << "Could not open LHE file \"" << fileURL << "\" for reading" << std::endl;
56 
57  fileStream = std::make_unique<StorageWrap>(std::move(storage));
58  }
59 
60  ~FileSource() override {}
61 
63 
64  private:
65  std::unique_ptr<StorageWrap> fileStream;
66  };
67 
69  public:
71  if (inputs.empty())
72  throw cms::Exception("StreamOpenError") << "Empty LHE file string name \"" << std::endl;
73 
74  std::stringstream *tmpis = new std::stringstream(inputs);
75  fileStream.reset(tmpis);
76  }
77 
78  ~StringSource() override {}
79 
81 
82  private:
83  std::unique_ptr<std::istream> fileStream;
84  };
85 
87  public:
88  typedef std::vector<std::pair<std::string, std::string> > wgt_info;
90  : impl(nullptr),
91  gotObject(kNone),
92  mode(kNone),
93  xmlHeader(nullptr),
94  xmlEvent(nullptr),
95  headerOk(false),
96  npLO(-99),
97  npNLO(-99) {}
98  ~XMLHandler() override {
99  if (xmlHeader)
100  xmlHeader->release();
101  if (xmlEvent)
102  xmlEvent->release();
103  }
104 
105  enum Object { kNone = 0, kHeader, kInit, kComment, kEvent };
106 
107  void reset() {
108  headerOk = false;
109  weightsinevent.clear();
110  gotObject = kNone;
111  mode = kNone;
112  }
113 
114  const wgt_info &weightInfo() const { return weightsinevent; }
115 
116  protected:
117  void startElement(const XMLCh *const uri,
118  const XMLCh *const localname,
119  const XMLCh *const qname,
120  const Attributes &attributes) override;
121 
122  void endElement(const XMLCh *const uri, const XMLCh *const localname, const XMLCh *const qname) override;
123 
124  void characters(const XMLCh *const chars, const XMLSize_t length) override;
125  void comment(const XMLCh *const chars, const XMLSize_t length) override;
126 
127  private:
128  friend class LHEReader;
129 
130  bool skipEvent = false;
131  std::unique_ptr<DOMImplementation> impl;
135  DOMDocument *xmlHeader;
136  DOMDocument *xmlEvent;
137  std::vector<DOMElement *> xmlNodes, xmlEventNodes;
138  bool headerOk;
139  std::vector<LHERunInfo::Header> headers;
141  int npLO;
142  int npNLO;
143  std::vector<float> scales;
144  };
145 
146  static void attributesToDom(DOMElement *dom, const Attributes &attributes) {
147  for (unsigned int i = 0; i < attributes.getLength(); i++) {
148  const XMLCh *name = attributes.getQName(i);
149  const XMLCh *value = attributes.getValue(i);
150 
151  dom->setAttribute(name, value);
152  }
153  }
154 
155  static void fillHeader(LHERunInfo::Header &header, const char *data, int len = -1) {
156  const char *end = len >= 0 ? (data + len) : nullptr;
157  while (*data && (!end || data < end)) {
158  std::size_t len = std::strcspn(data, "\r\n");
159  if (end && data + len > end)
160  len = end - data;
161  if (data[len] == '\r' && data[len + 1] == '\n')
162  len += 2;
163  else if (data[len])
164  len++;
165  header.addLine(std::string(data, len));
166  data += len;
167  }
168  }
169 
170  void LHEReader::XMLHandler::startElement(const XMLCh *const uri,
171  const XMLCh *const localname,
172  const XMLCh *const qname,
173  const Attributes &attributes) {
174  std::string name((const char *)XMLSimpleStr(qname));
175 
176  if (!headerOk) {
177  if (name != "LesHouchesEvents")
178  throw cms::Exception("InvalidFormat") << "LHE file has invalid header" << std::endl;
179  headerOk = true;
180  return;
181  }
182 
183  if (mode == kHeader) {
184  DOMElement *elem = xmlHeader->createElement(qname);
185  attributesToDom(elem, attributes);
186  xmlNodes.back()->appendChild(elem);
187  xmlNodes.push_back(elem);
188  return;
189  } else if (mode == kEvent) {
190  if (skipEvent) {
191  return;
192  }
193 
194  DOMElement *elem = xmlEvent->createElement(qname);
195  attributesToDom(elem, attributes);
196 
197  //TODO this is a hack (even more than the rest of this class)
198  if (name == "rwgt") {
199  xmlEventNodes[0]->appendChild(elem);
200  } else if (name == "wgt") {
201  xmlEventNodes[1]->appendChild(elem);
202  } else if (name == "scales") {
203  for (XMLSize_t iscale = 0; iscale < attributes.getLength(); ++iscale) {
204  int ipart = 0;
205  const char *scalename = XMLSimpleStr(attributes.getQName(iscale));
206  int nmatch = sscanf(scalename, "pt_clust_%d", &ipart);
207 
208  if (nmatch != 1) {
209  edm::LogError("Generator|LHEInterface") << "invalid attribute in <scales> tag" << std::endl;
210  }
211 
212  float scaleval;
213  const char *scalevalstr = XMLSimpleStr(attributes.getValue(iscale));
214  sscanf(scalevalstr, "%e", &scaleval);
215 
216  scales.push_back(scaleval);
217  }
218  }
219  xmlEventNodes.push_back(elem);
220  return;
221  } else if (mode == kInit) {
222  //skip unknown tags in init block as well
223  return;
224  } else if (mode != kNone) {
225  throw cms::Exception("InvalidFormat") << "LHE file has invalid format" << std::endl;
226  }
227 
228  if (name == "header") {
229  if (!impl)
230  impl.reset(DOMImplementationRegistry::getDOMImplementation(XMLUniStr("Core")));
231 
232  xmlHeader = impl->createDocument(nullptr, qname, nullptr);
233  xmlNodes.resize(1);
234  xmlNodes[0] = xmlHeader->getDocumentElement();
235  mode = kHeader;
236  }
237  if (name == "init") {
238  mode = kInit;
239  } else if (name == "event") {
240  if (!skipEvent) {
241  if (!impl)
242  impl.reset(DOMImplementationRegistry::getDOMImplementation(XMLUniStr("Core")));
243 
244  if (xmlEvent)
245  xmlEvent->release();
246  xmlEvent = impl->createDocument(nullptr, qname, nullptr);
247  weightsinevent.resize(0);
248  scales.clear();
249 
250  npLO = -99;
251  npNLO = -99;
252  const XMLCh *npLOval = attributes.getValue(XMLString::transcode("npLO"));
253  if (npLOval) {
254  const char *npLOs = XMLSimpleStr(npLOval);
255  sscanf(npLOs, "%d", &npLO);
256  }
257  const XMLCh *npNLOval = attributes.getValue(XMLString::transcode("npNLO"));
258  if (npNLOval) {
259  const char *npNLOs = XMLSimpleStr(npNLOval);
260  sscanf(npNLOs, "%d", &npNLO);
261  }
262 
263  xmlEventNodes.resize(1);
264  xmlEventNodes[0] = xmlEvent->getDocumentElement();
265  }
266  mode = kEvent;
267  }
268 
269  if (mode == kNone)
270  throw cms::Exception("InvalidFormat") << "LHE file has invalid format" << std::endl;
271 
272  buffer.clear();
273  }
274 
275  void LHEReader::XMLHandler::endElement(const XMLCh *const uri,
276  const XMLCh *const localname,
277  const XMLCh *const qname) {
278  std::string name((const char *)XMLSimpleStr(qname));
279 
280  if (mode) {
281  if (mode == kHeader && xmlNodes.size() > 1) {
282  xmlNodes.resize(xmlNodes.size() - 1);
283  return;
284  } else if (mode == kHeader) {
285  std::unique_ptr<DOMLSSerializer> writer(impl->createLSSerializer());
286  std::unique_ptr<DOMLSOutput> outputDesc(impl->createLSOutput());
287  assert(outputDesc.get());
288  outputDesc->setEncoding(XMLUniStr("UTF-8"));
289 
290  for (DOMNode *node = xmlNodes[0]->getFirstChild(); node; node = node->getNextSibling()) {
291  XMLSimpleStr buffer(writer->writeToString(node));
292 
294  const char *p, *q;
295  DOMElement *elem;
296 
297  switch (node->getNodeType()) {
298  case DOMNode::ELEMENT_NODE:
299  elem = static_cast<DOMElement *>(node);
300  type = (const char *)XMLSimpleStr(elem->getTagName());
301  p = std::strchr((const char *)buffer, '>') + 1;
302  q = std::strrchr(p, '<');
303  break;
304  case DOMNode::COMMENT_NODE:
305  type = "";
306  p = buffer + 4;
307  q = buffer + strlen(buffer) - 3;
308  break;
309  default:
310  type = "<>";
311  p = buffer + std::strspn(buffer, " \t\r\n");
312  if (!*p)
313  continue;
314  q = p + strlen(p);
315  }
317  fillHeader(header, p, q - p);
318  headers.push_back(header);
319  }
320 
321  xmlHeader->release();
322  xmlHeader = nullptr;
323  } else if (name == "event" && mode == kEvent &&
324  (skipEvent || (!xmlEventNodes.empty()))) { // handling of weights in LHE file
325 
326  if (skipEvent) {
327  gotObject = mode;
328  mode = kNone;
329  return;
330  }
331 
332  for (DOMNode *node = xmlEventNodes[0]->getFirstChild(); node; node = node->getNextSibling()) {
333  switch (node->getNodeType()) {
334  case DOMNode::ELEMENT_NODE: // rwgt
335  for (DOMNode *rwgt = xmlEventNodes[1]->getFirstChild(); rwgt; rwgt = rwgt->getNextSibling()) {
336  DOMNode *attr = rwgt->getAttributes()->item(0);
337  XMLSimpleStr atname(attr->getNodeValue());
338  XMLSimpleStr weight(rwgt->getFirstChild()->getNodeValue());
339  switch (rwgt->getNodeType()) {
340  case DOMNode::ELEMENT_NODE:
341  weightsinevent.push_back(std::make_pair((const char *)atname, (const char *)weight));
342  break;
343  default:
344  break;
345  }
346  }
347  break;
348  case DOMNode::TEXT_NODE: // event information
349  {
350  XMLSimpleStr data(node->getNodeValue());
351  buffer.append(data);
352  } break;
353  default:
354  break;
355  }
356  }
357  } else if (mode == kEvent) {
358  //skip unknown tags
359  return;
360  }
361 
362  if (gotObject != kNone)
363  throw cms::Exception("InvalidState") << "Unexpected pileup in"
364  " LHEReader::XMLHandler::endElement"
365  << std::endl;
366 
367  gotObject = mode;
368  mode = kNone;
369  }
370  }
371 
372  void LHEReader::XMLHandler::characters(const XMLCh *const data_, const XMLSize_t length) {
373  if (mode == kHeader) {
374  DOMText *text = xmlHeader->createTextNode(data_);
375  xmlNodes.back()->appendChild(text);
376  return;
377  }
378 
379  if (XMLSimpleStr::isAllSpaces(data_, length))
380  return;
381 
382  unsigned int offset = 0;
383  while (offset < length && XMLSimpleStr::isSpace(data_[offset]))
384  offset++;
385 
386  if (mode == kEvent) {
387  if (!skipEvent) {
388  DOMText *text = xmlEvent->createTextNode(data_ + offset);
389  xmlEventNodes.back()->appendChild(text);
390  }
391  return;
392  }
393 
394  if (mode == kNone)
395  throw cms::Exception("InvalidFormat") << "LHE file has invalid format" << std::endl;
396 
397  XMLSimpleStr data(data_ + offset);
398  buffer.append(data);
399  }
400 
401  void LHEReader::XMLHandler::comment(const XMLCh *const data_, const XMLSize_t length) {
402  if (mode == kHeader) {
403  DOMComment *comment = xmlHeader->createComment(data_);
404  xmlNodes.back()->appendChild(comment);
405  return;
406  }
407 
408  XMLSimpleStr data(data_);
409 
412  headers.push_back(header);
413  }
414 
416  : fileURLs(params.getUntrackedParameter<std::vector<std::string> >("fileNames")),
417  strName(""),
418  firstEvent(params.getUntrackedParameter<unsigned int>("skipEvents", 0)),
419  maxEvents(params.getUntrackedParameter<int>("limitEvents", -1)),
420  curIndex(0),
421  handler(new XMLHandler()) {}
422 
423  LHEReader::LHEReader(const std::vector<std::string> &fileNames, unsigned int firstEvent)
424  : fileURLs(fileNames),
425  strName(""),
427  maxEvents(-1),
428  curIndex(0),
429  handler(new XMLHandler()) {}
430 
432  : strName(inputs), firstEvent(firstEvent), maxEvents(-1), curIndex(0), handler(new XMLHandler()) {}
433 
435  // Explicitly release "orphaned" resources
436  // that were created through DOM implementation
437  // createXXXX factory method *before* last
438  // XMLPlatformUtils::Terminate is called.
439  handler.release();
440  curDoc.release();
441  curSource.release();
442  }
443 
444  std::shared_ptr<LHEEvent> LHEReader::next(bool *newFileOpened) {
445  while (curDoc.get() || curIndex < fileURLs.size() || (fileURLs.empty() && !strName.empty())) {
446  if (!curDoc.get()) {
447  if (!platform) {
448  //If we read multiple files, the XercesPlatform must live longer than any one
449  // XMLDocument.
451  }
452  if (!fileURLs.empty()) {
453  logFileAction(" Initiating request to open LHE file ", fileURLs[curIndex]);
454  curSource = std::make_unique<FileSource>(fileURLs[curIndex]);
455  logFileAction(" Successfully opened LHE file ", fileURLs[curIndex]);
456  if (newFileOpened != nullptr)
457  *newFileOpened = true;
458  ++curIndex;
459  } else if (!strName.empty()) {
460  curSource = std::make_unique<StringSource>(strName);
461  }
462  handler->reset();
463  curDoc.reset(curSource->createReader(*handler));
464  curRunInfo.reset();
465  }
466  handler->skipEvent = firstEvent > 0;
467 
468  XMLHandler::Object event = handler->gotObject;
469  handler->gotObject = XMLHandler::kNone;
470 
471  switch (event) {
472  case XMLHandler::kNone:
473  if (!curDoc->parse()) {
474  curDoc.reset();
475  logFileAction(" Closed LHE file ", fileURLs[curIndex - 1]);
476  return std::shared_ptr<LHEEvent>();
477  }
478  break;
479 
480  case XMLHandler::kHeader:
481  break;
482 
483  case XMLHandler::kInit: {
484  std::istringstream data;
485  data.str(handler->buffer);
486  handler->buffer.clear();
487 
488  curRunInfo.reset(new LHERunInfo(data));
489 
490  std::for_each(handler->headers.begin(),
491  handler->headers.end(),
492  std::bind(&LHERunInfo::addHeader, curRunInfo.get(), std::placeholders::_1));
493  handler->headers.clear();
494  } break;
495 
497  break;
498 
499  case XMLHandler::kEvent: {
500  if (!curRunInfo.get())
501  throw cms::Exception("InvalidState") << "Got LHE event without"
502  " initialization."
503  << std::endl;
504 
505  if (firstEvent > 0) {
506  firstEvent--;
507  continue;
508  }
509 
510  if (maxEvents == 0)
511  return std::shared_ptr<LHEEvent>();
512  else if (maxEvents > 0)
513  maxEvents--;
514 
515  std::istringstream data;
516  data.str(handler->buffer);
517  handler->buffer.clear();
518 
519  std::shared_ptr<LHEEvent> lheevent;
520  lheevent.reset(new LHEEvent(curRunInfo, data));
521  const XMLHandler::wgt_info &info = handler->weightsinevent;
522  for (size_t i = 0; i < info.size(); ++i) {
523  double num = -1.0;
524  sscanf(info[i].second.c_str(), "%le", &num);
525  lheevent->addWeight(gen::WeightsInfo(info[i].first, num));
526  }
527  lheevent->setNpLO(handler->npLO);
528  lheevent->setNpNLO(handler->npNLO);
529  //fill scales
530  if (!handler->scales.empty()) {
531  lheevent->setScales(handler->scales);
532  }
533  return lheevent;
534  }
535  }
536  }
537 
538  return std::shared_ptr<LHEEvent>();
539  }
540 
541 } // namespace lhef
std::shared_ptr< void > platform
Definition: LHEReader.h:42
XMLDocument * createReader(XMLDocument::Handler &handler) override
Definition: LHEReader.cc:62
static const TGPicture * info(bool iBackgroundIsBlack)
LHEReader(const edm::ParameterSet &params)
Definition: LHEReader.cc:415
static void logFileAction(char const *msg, std::string const &fileName)
Definition: LH5Reader.cc:31
static void fillHeader(LHERunInfo::Header &header, const char *data, int len=-1)
Definition: LHEReader.cc:155
std::unique_ptr< std::istream > fileStream
Definition: LHEReader.cc:83
void FlushMessageLog()
StringSource(const std::string &inputs)
Definition: LHEReader.cc:70
void endElement(const XMLCh *const uri, const XMLCh *const localname, const XMLCh *const qname) override
Definition: LHEReader.cc:275
std::vector< std::pair< std::string, std::string > > wgt_info
Definition: LHEReader.cc:88
const std::vector< std::string > fileURLs
Definition: LHEReader.h:29
static bool isSpace(XMLCh ch)
Definition: XMLUtils.h:88
void characters(const XMLCh *const chars, const XMLSize_t length) override
Definition: LHEReader.cc:372
Definition: weight.py:1
Log< level::Error, false > LogError
assert(be >=bs)
std::vector< LHERunInfo::Header > headers
Definition: LHEReader.cc:139
U second(std::pair< T, U > const &p)
void addHeader(const Header &header)
Definition: LHERunInfo.h:61
std::unique_ptr< StorageWrap > fileStream
Definition: LHEReader.cc:65
static const char *const kComment
static bool isAllSpaces(const XMLCh *str, unsigned int length)
Definition: XMLUtils.h:84
const std::string strName
Definition: LHEReader.h:32
Definition: value.py:1
static std::shared_ptr< void > platformHandle()
Definition: XMLUtils.h:49
std::unique_ptr< DOMImplementation > impl
Definition: LHEReader.cc:131
std::vector< DOMElement * > xmlNodes
Definition: LHEReader.cc:137
std::unique_ptr< XMLDocument > curDoc
Definition: LHEReader.h:39
void startElement(const XMLCh *const uri, const XMLCh *const localname, const XMLCh *const qname, const Attributes &attributes) override
Definition: LHEReader.cc:170
const wgt_info & weightInfo() const
Definition: LHEReader.cc:114
std::unique_ptr< Source > curSource
Definition: LHEReader.h:38
XMLDocument * createReader(XMLDocument::Handler &handler) override
Definition: LHEReader.cc:80
tuple msg
Definition: mps_check.py:285
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:79
void comment(const XMLCh *const chars, const XMLSize_t length) override
Definition: LHEReader.cc:401
virtual XMLDocument * createReader(XMLDocument::Handler &handler)=0
unsigned int firstEvent
Definition: LHEReader.h:33
std::shared_ptr< LHERunInfo > curRunInfo
Definition: LHEReader.h:40
Log< level::System, true > LogAbsolute
std::vector< float > scales
Definition: LHEReader.cc:143
std::unique_ptr< XMLHandler > handler
Definition: LHEReader.h:41
std::shared_ptr< LHEEvent > next(bool *newFileOpened=nullptr)
Definition: LHEReader.cc:444
#define get
FileSource(const std::string &fileURL)
Definition: LHEReader.cc:49
static void attributesToDom(DOMElement *dom, const Attributes &attributes)
Definition: LHEReader.cc:146
unsigned int curIndex
Definition: LHEReader.h:35
def move(src, dest)
Definition: eostools.py:511
Definition: event.py:1