CMS 3D CMS Logo

LHEReader.cc
Go to the documentation of this file.
1 #include <algorithm>
2 #include <iomanip>
3 #include <iostream>
4 #include <sstream>
5 #include <fstream>
6 #include <cstring>
7 #include <string>
8 #include <vector>
9 #include <cstdio>
10 
11 #include <boost/bind.hpp>
12 
13 #include <xercesc/sax2/Attributes.hpp>
14 #include <xercesc/dom/DOM.hpp>
15 
20 
24 
28 
29 #include "XMLUtils.h"
30 
31 #include "boost/lexical_cast.hpp"
32 
34 
35 namespace lhef {
36 
37  static void logFileAction(char const* msg, std::string const& fileName) {
38  edm::LogAbsolute("fileAction") << std::setprecision(0) << edm::TimeOfDay() << msg << fileName;
40  }
41 
42 
44  public:
45  Source() {}
46  virtual ~Source() {}
48 };
49 
51  public:
52  FileSource(const std::string &fileURL)
53  {
54  auto storage =
55  StorageFactory::get()->open(fileURL,
57 
58  if (!storage)
59  throw cms::Exception("FileOpenError")
60  << "Could not open LHE file \""
61  << fileURL << "\" for reading"
62  << std::endl;
63 
64  fileStream.reset(new StorageWrap(std::move(storage)));
65  }
66 
67  ~FileSource() override {}
68 
70  { return new XMLDocument(fileStream, handler); }
71 
72  private:
73  std::unique_ptr<StorageWrap> fileStream;
74 };
75 
77  public:
79  {
80  if (inputs.empty())
81  throw cms::Exception("StreamOpenError")
82  << "Empty LHE file string name \""
83  << std::endl;
84 
85  std::stringstream * tmpis = new std::stringstream(inputs);
86  fileStream.reset(tmpis);
87  }
88 
89  ~StringSource() override {}
90 
92  { return new XMLDocument(fileStream, handler); }
93 
94  private:
95  std::unique_ptr<std::istream> fileStream;
96 };
97 
99  public:
100  typedef std::vector<std::pair<std::string,std::string> > wgt_info;
102  impl(nullptr),
103  gotObject(kNone), mode(kNone),
104  xmlHeader(nullptr), xmlEvent(nullptr), headerOk(false), npLO(-99), npNLO(-99) {}
105  ~XMLHandler() override
106  { if (xmlHeader) xmlHeader->release();
107  if (xmlEvent) xmlEvent->release(); }
108 
109  enum Object {
110  kNone = 0,
114  kEvent
115  };
116 
117  void reset() {
118  headerOk = false;
119  weightsinevent.clear();
120  gotObject = kNone;
121  mode = kNone;
122  }
123 
124  const wgt_info& weightInfo() const {return weightsinevent;}
125 
126  protected:
127  void startElement(const XMLCh *const uri,
128  const XMLCh *const localname,
129  const XMLCh *const qname,
130  const Attributes &attributes) override;
131 
132  void endElement(const XMLCh *const uri,
133  const XMLCh *const localname,
134  const XMLCh *const qname) override;
135 
136  void characters (const XMLCh *const chars, const XMLSize_t length) override;
137  void comment (const XMLCh *const chars, const XMLSize_t length) override;
138 
139  private:
140  friend class LHEReader;
141 
142  bool skipEvent = false;
143  std::unique_ptr<DOMImplementation> impl;
147  DOMDocument *xmlHeader;
148  DOMDocument *xmlEvent;
149  std::vector<DOMElement*> xmlNodes,xmlEventNodes;
150  bool headerOk;
151  std::vector<LHERunInfo::Header> headers;
152  wgt_info weightsinevent;
153  int npLO;
154  int npNLO;
155  std::vector<float> scales;
156  int evtnum=-1;
157 };
158 
159 static void attributesToDom(DOMElement *dom, const Attributes &attributes)
160 {
161  for(unsigned int i = 0; i < attributes.getLength(); i++) {
162  const XMLCh *name = attributes.getQName(i);
163  const XMLCh *value = attributes.getValue(i);
164 
165  dom->setAttribute(name, value);
166  }
167 }
168 
169 static void fillHeader(LHERunInfo::Header &header, const char *data,
170  int len = -1)
171 {
172  const char *end = len >= 0 ? (data + len) : nullptr;
173  while(*data && (!end || data < end)) {
174  std::size_t len = std::strcspn(data, "\r\n");
175  if (end && data + len > end)
176  len = end - data;
177  if (data[len] == '\r' && data[len + 1] == '\n')
178  len += 2;
179  else if (data[len])
180  len++;
181  header.addLine(std::string(data, len));
182  data += len;
183  }
184 }
185 
186 void LHEReader::XMLHandler::startElement(const XMLCh *const uri,
187  const XMLCh *const localname,
188  const XMLCh *const qname,
189  const Attributes &attributes)
190 {
191  std::string name((const char*)XMLSimpleStr(qname));
192 
193  if (!headerOk) {
194  if (name != "LesHouchesEvents")
195  throw cms::Exception("InvalidFormat")
196  << "LHE file has invalid header" << std::endl;
197  headerOk = true;
198  return;
199  }
200 
201  if (mode == kHeader) {
202  DOMElement *elem = xmlHeader->createElement(qname);
203  attributesToDom(elem, attributes);
204  xmlNodes.back()->appendChild(elem);
205  xmlNodes.push_back(elem);
206  return;
207  } else if ( mode == kEvent ) {
208 
209  if (skipEvent) {return;}
210 
211  DOMElement *elem = xmlEvent->createElement(qname);
212  attributesToDom(elem, attributes);
213 
214  //TODO this is a hack (even more than the rest of this class)
215  if( name == "rwgt" ) {
216  xmlEventNodes[0]->appendChild(elem);
217  } else if (name == "wgt") {
218  xmlEventNodes[1]->appendChild(elem);
219  }
220  else if (name == "scales") {
221  for (XMLSize_t iscale=0; iscale<attributes.getLength(); ++iscale) {
222  int ipart = 0;
223  const char *scalename = XMLSimpleStr(attributes.getQName(iscale));
224  int nmatch = sscanf(scalename,"pt_clust_%d",&ipart);
225 
226  if (nmatch!=1) {
227  edm::LogError("Generator|LHEInterface")
228  << "invalid attribute in <scales> tag"
229  << std::endl;
230  }
231 
232  float scaleval;
233  const char *scalevalstr = XMLSimpleStr(attributes.getValue(iscale));
234  sscanf(scalevalstr,"%e",&scaleval);
235 
236  scales.push_back(scaleval);
237  }
238  } else if( name == "event_num" ) {
239  const char *evtnumstr = XMLSimpleStr(attributes.getValue(XMLString::transcode("num")));
240  sscanf(evtnumstr,"%d",&evtnum);
241  }
242 
243  xmlEventNodes.push_back(elem);
244  return;
245  } else if (mode == kInit) {
246  //skip unknown tags in init block as well
247  return;
248  } else if (mode != kNone) {
249  throw cms::Exception("InvalidFormat")
250  << "LHE file has invalid format" << std::endl;
251  }
252 
253  if (name == "header") {
254  if (!impl)
255  impl.reset(DOMImplementationRegistry::getDOMImplementation(XMLUniStr("Core")));
256 
257  xmlHeader = impl->createDocument(nullptr, qname, nullptr);
258  xmlNodes.resize(1);
259  xmlNodes[0] = xmlHeader->getDocumentElement();
260  mode = kHeader;
261  } if (name == "init") {
262  mode = kInit;
263  } else if (name == "event") {
264  if (!skipEvent)
265  {
266  if (!impl)
267  impl.reset(DOMImplementationRegistry::getDOMImplementation(XMLUniStr("Core")));
268 
269  if(xmlEvent) xmlEvent->release();
270  xmlEvent = impl->createDocument(nullptr, qname, nullptr);
271  weightsinevent.resize(0);
272  scales.clear();
273 
274  npLO = -99;
275  npNLO = -99;
276  const XMLCh *npLOval = attributes.getValue(XMLString::transcode("npLO"));
277  if (npLOval) {
278  const char *npLOs = XMLSimpleStr(npLOval);
279  sscanf(npLOs,"%d",&npLO);
280  }
281  const XMLCh *npNLOval = attributes.getValue(XMLString::transcode("npNLO"));
282  if (npNLOval) {
283  const char *npNLOs = XMLSimpleStr(npNLOval);
284  sscanf(npNLOs,"%d",&npNLO);
285  }
286 
287  xmlEventNodes.resize(1);
288  xmlEventNodes[0] = xmlEvent->getDocumentElement();
289  }
290  mode = kEvent;
291  }
292 
293  if (mode == kNone)
294  throw cms::Exception("InvalidFormat")
295  << "LHE file has invalid format" << std::endl;
296 
297  buffer.clear();
298 }
299 
300 void LHEReader::XMLHandler::endElement(const XMLCh *const uri,
301  const XMLCh *const localname,
302  const XMLCh *const qname)
303 {
304  std::string name((const char*)XMLSimpleStr(qname));
305 
306  if (mode) {
307 
308  if (mode == kHeader && xmlNodes.size() > 1) {
309  xmlNodes.resize(xmlNodes.size() - 1);
310  return;
311  } else if (mode == kHeader) {
312  std::unique_ptr<DOMLSSerializer> writer(impl->createLSSerializer());
313  std::unique_ptr<DOMLSOutput> outputDesc(impl->createLSOutput());
314  assert(outputDesc.get());
315  outputDesc->setEncoding(XMLUniStr("UTF-8"));
316 
317  for(DOMNode *node = xmlNodes[0]->getFirstChild();
318  node; node = node->getNextSibling()) {
319  XMLSimpleStr buffer(writer->writeToString(node));
320 
322  const char *p, *q;
323  DOMElement *elem;
324 
325  switch(node->getNodeType()) {
326  case DOMNode::ELEMENT_NODE:
327  elem = static_cast<DOMElement*>(node);
328  type = (const char*)XMLSimpleStr(
329  elem->getTagName());
330  p = std::strchr((const char*)buffer,
331  '>') + 1;
332  q = std::strrchr(p, '<');
333  break;
334  case DOMNode::COMMENT_NODE:
335  type = "";
336  p = buffer + 4;
337  q = buffer + strlen(buffer) - 3;
338  break;
339  default:
340  type = "<>";
341  p = buffer +
342  std::strspn(buffer, " \t\r\n");
343  if (!*p)
344  continue;
345  q = p + strlen(p);
346  }
348  fillHeader(header, p, q - p);
349  headers.push_back(header);
350  }
351 
352  xmlHeader->release();
353  xmlHeader = nullptr;
354  }
355  else if (name == "event" &&
356  mode == kEvent &&
357  (skipEvent || (!xmlEventNodes.empty()))) { // handling of weights in LHE file
358 
359  if (skipEvent)
360  {
361  gotObject = mode;
362  mode = kNone;
363  return;
364  }
365 
366  for(DOMNode *node = xmlEventNodes[0]->getFirstChild();
367  node; node = node->getNextSibling()) {
368  switch( node->getNodeType() ) {
369  case DOMNode::ELEMENT_NODE: // rwgt
370  for(DOMNode *rwgt = xmlEventNodes[1]->getFirstChild();
371  rwgt; rwgt = rwgt->getNextSibling()) {
372  DOMNode* attr = rwgt->getAttributes()->item(0);
373  XMLSimpleStr atname(attr->getNodeValue());
374  XMLSimpleStr weight(rwgt->getFirstChild()->getNodeValue());
375  switch( rwgt->getNodeType() ) {
376  case DOMNode::ELEMENT_NODE:
377  weightsinevent.push_back(std::make_pair((const char*)atname,
378  (const char*)weight));
379  break;
380  default:
381  break;
382  }
383  }
384  break;
385  case DOMNode::TEXT_NODE: // event information
386  {
387  XMLSimpleStr data(node->getNodeValue());
388  buffer.append(data);
389  }
390  break;
391  default:
392  break;
393  }
394  }
395  }
396  else if (mode == kEvent) {
397  //skip unknown tags
398  return;
399  }
400 
401  if (gotObject != kNone)
402  throw cms::Exception("InvalidState")
403  << "Unexpected pileup in"
404  " LHEReader::XMLHandler::endElement"
405  << std::endl;
406 
407  gotObject = mode;
408  mode = kNone;
409  }
410 }
411 
412 void LHEReader::XMLHandler::characters(const XMLCh *const data_,
413  const XMLSize_t length)
414 {
415  if (mode == kHeader) {
416  DOMText *text = xmlHeader->createTextNode(data_);
417  xmlNodes.back()->appendChild(text);
418  return;
419  }
420 
421  if (XMLSimpleStr::isAllSpaces(data_, length))
422  return;
423 
424  unsigned int offset = 0;
425  while(offset < length && XMLSimpleStr::isSpace(data_[offset]))
426  offset++;
427 
428  if( mode == kEvent ) {
429  if (!skipEvent)
430  {
431  DOMText *text = xmlEvent->createTextNode(data_+offset);
432  xmlEventNodes.back()->appendChild(text);
433  }
434  return;
435  }
436 
437  if (mode == kNone)
438  throw cms::Exception("InvalidFormat")
439  << "LHE file has invalid format" << std::endl;
440 
441  XMLSimpleStr data(data_ + offset);
442  buffer.append(data);
443 }
444 
445 void LHEReader::XMLHandler::comment(const XMLCh *const data_,
446  const XMLSize_t length)
447 {
448  if (mode == kHeader) {
449  DOMComment *comment = xmlHeader->createComment(data_);
450  xmlNodes.back()->appendChild(comment);
451  return;
452  }
453 
454  XMLSimpleStr data(data_);
455 
457  fillHeader(header, data);
458  headers.push_back(header);
459 }
460 
462  fileURLs(params.getUntrackedParameter< std::vector<std::string> >("fileNames")),
463  strName(""),
464  firstEvent(params.getUntrackedParameter<unsigned int>("skipEvents", 0)),
465  maxEvents(params.getUntrackedParameter<int>("limitEvents", -1)),
466  curIndex(0), handler(new XMLHandler())
467 {
468 }
469 
470 LHEReader::LHEReader(const std::vector<std::string> &fileNames,
471  unsigned int firstEvent) :
472  fileURLs(fileNames), strName(""), firstEvent(firstEvent), maxEvents(-1),
473  curIndex(0), handler(new XMLHandler())
474 {
475 }
476 
478  unsigned int firstEvent) :
479  strName(inputs), firstEvent(firstEvent), maxEvents(-1),
480  curIndex(0), handler(new XMLHandler())
481 {
482 }
483 
485 {
486  // Explicitly release "orphaned" resources
487  // that were created through DOM implementation
488  // createXXXX factory method *before* last
489  // XMLPlatformUtils::Terminate is called.
490  handler.release();
491  curDoc.release();
492  curSource.release();
493 }
494 
495  std::shared_ptr<LHEEvent> LHEReader::next(bool* newFileOpened)
496  {
497  while(curDoc.get() || curIndex < fileURLs.size() || (fileURLs.empty() && !strName.empty() ) ) {
498  if (!curDoc.get()) {
499  if(!platform) {
500  //If we read multiple files, the XercesPlatform must live longer than any one
501  // XMLDocument.
503  }
504  if ( !fileURLs.empty() ) {
505  logFileAction(" Initiating request to open LHE file ", fileURLs[curIndex]);
506  curSource.reset(new FileSource(fileURLs[curIndex]));
507  logFileAction(" Successfully opened LHE file ", fileURLs[curIndex]);
508  if ( newFileOpened != nullptr ) *newFileOpened = true;
509  ++curIndex;
510  } else if ( !strName.empty() ) {
511  curSource.reset(new StringSource(strName));
512  }
513  handler->reset();
514  curDoc.reset(curSource->createReader(*handler));
515  curRunInfo.reset();
516  }
517  handler->skipEvent = firstEvent > 0;
518 
519  XMLHandler::Object event = handler->gotObject;
520  handler->gotObject = XMLHandler::kNone;
521 
522 
523  switch(event) {
524  case XMLHandler::kNone:
525  if (!curDoc->parse()) {
526  curDoc.reset();
527  logFileAction(" Closed LHE file ", fileURLs[curIndex - 1]);
528  return std::shared_ptr<LHEEvent>();
529  }
530  break;
531 
532  case XMLHandler::kHeader:
533  break;
534 
535  case XMLHandler::kInit:
536  {
537  std::istringstream data;
538  data.str(handler->buffer);
539  handler->buffer.clear();
540 
541  curRunInfo.reset(new LHERunInfo(data));
542 
543  std::for_each(handler->headers.begin(),
544  handler->headers.end(),
545  boost::bind(&LHERunInfo::addHeader,
546  curRunInfo.get(), _1));
547  handler->headers.clear();
548  }
549  break;
550 
552  break;
553 
554  case XMLHandler::kEvent:
555  {
556  if (!curRunInfo.get())
557  throw cms::Exception("InvalidState")
558  << "Got LHE event without"
559  " initialization." << std::endl;
560 
561  if (firstEvent > 0) {
562  firstEvent--;
563  continue;
564  }
565 
566  if (maxEvents == 0)
567  return std::shared_ptr<LHEEvent>();
568  else if (maxEvents > 0)
569  maxEvents--;
570 
571  std::istringstream data;
572  data.str(handler->buffer);
573  handler->buffer.clear();
574 
575  std::shared_ptr<LHEEvent> lheevent;
576  lheevent.reset(new LHEEvent(curRunInfo, data));
577  const XMLHandler::wgt_info& info = handler->weightsinevent;
578  for( size_t i=0; i< info.size(); ++i ) {
579  double num = -1.0;
580  sscanf(info[i].second.c_str(),"%le",&num);
581  lheevent->addWeight(gen::WeightsInfo(info[i].first,num));
582  }
583  lheevent->setNpLO(handler->npLO);
584  lheevent->setNpNLO(handler->npNLO);
585  lheevent->setEvtNum(handler->evtnum);
586  handler->evtnum = -1;
587  //fill scales
588  if (!handler->scales.empty()) {
589  lheevent->setScales(handler->scales);
590  }
591  return lheevent;
592  }
593  }
594  }
595 
596  return std::shared_ptr<LHEEvent>();
597  }
598 
599 } // namespace lhef
600 
std::shared_ptr< void > platform
Definition: LHEReader.h:44
type
Definition: HCALResponse.h:21
XMLDocument * createReader(XMLDocument::Handler &handler) override
Definition: LHEReader.cc:69
static const TGPicture * info(bool iBackgroundIsBlack)
LHEReader(const edm::ParameterSet &params)
Definition: LHEReader.cc:461
static void logFileAction(char const *msg, std::string const &fileName)
Definition: LHEReader.cc:37
static void fillHeader(LHERunInfo::Header &header, const char *data, int len=-1)
Definition: LHEReader.cc:169
std::unique_ptr< std::istream > fileStream
Definition: LHEReader.cc:95
void FlushMessageLog()
StringSource(const std::string &inputs)
Definition: LHEReader.cc:78
void endElement(const XMLCh *const uri, const XMLCh *const localname, const XMLCh *const qname) override
Definition: LHEReader.cc:300
const wgt_info & weightInfo() const
Definition: LHEReader.cc:124
#define nullptr
const std::vector< std::string > fileURLs
Definition: LHEReader.h:31
static bool isSpace(XMLCh ch)
Definition: XMLUtils.h:89
void characters(const XMLCh *const chars, const XMLSize_t length) override
Definition: LHEReader.cc:412
Definition: weight.py:1
std::vector< LHERunInfo::Header > headers
Definition: LHEReader.cc:151
U second(std::pair< T, U > const &p)
static const StorageFactory * get(void)
void addHeader(const Header &header)
Definition: LHERunInfo.h:63
std::unique_ptr< StorageWrap > fileStream
Definition: LHEReader.cc:73
void addLine(const std::string &line)
static bool isAllSpaces(const XMLCh *str, unsigned int length)
Definition: XMLUtils.h:85
const std::string strName
Definition: LHEReader.h:34
std::vector< std::pair< std::string, std::string > > wgt_info
Definition: LHEReader.cc:100
#define end
Definition: vmac.h:39
Definition: value.py:1
static std::shared_ptr< void > platformHandle()
Definition: XMLUtils.h:46
std::unique_ptr< DOMImplementation > impl
Definition: LHEReader.cc:143
std::unique_ptr< XMLDocument > curDoc
Definition: LHEReader.h:41
std::vector< DOMElement * > xmlNodes
Definition: LHEReader.cc:149
def elem(elemtype, innerHTML='', html_class='', kwargs)
Definition: HTMLExport.py:19
void startElement(const XMLCh *const uri, const XMLCh *const localname, const XMLCh *const qname, const Attributes &attributes) override
Definition: LHEReader.cc:186
std::unique_ptr< Source > curSource
Definition: LHEReader.h:40
XMLDocument * createReader(XMLDocument::Handler &handler) override
Definition: LHEReader.cc:91
tuple msg
Definition: mps_check.py:285
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:82
void comment(const XMLCh *const chars, const XMLSize_t length) override
Definition: LHEReader.cc:445
virtual XMLDocument * createReader(XMLDocument::Handler &handler)=0
unsigned int firstEvent
Definition: LHEReader.h:35
std::shared_ptr< LHERunInfo > curRunInfo
Definition: LHEReader.h:42
std::vector< float > scales
Definition: LHEReader.cc:155
std::unique_ptr< XMLHandler > handler
Definition: LHEReader.h:43
std::shared_ptr< LHEEvent > next(bool *newFileOpened=nullptr)
Definition: LHEReader.cc:495
FileSource(const std::string &fileURL)
Definition: LHEReader.cc:52
static void attributesToDom(DOMElement *dom, const Attributes &attributes)
Definition: LHEReader.cc:159
unsigned int curIndex
Definition: LHEReader.h:37
def move(src, dest)
Definition: eostools.py:511
Definition: event.py:1
std::unique_ptr< Storage > open(const std::string &url, int mode=IOFlags::OpenRead) const
#define comment(par)
Definition: vmac.h:163