CMS 3D CMS Logo

LHEReader.cc
Go to the documentation of this file.
1 #include <algorithm>
2 #include <iomanip>
3 #include <iostream>
4 #include <sstream>
5 #include <fstream>
6 #include <cstring>
7 #include <string>
8 #include <vector>
9 #include <cstdio>
10 
11 #include <boost/bind.hpp>
12 
13 #include <xercesc/sax2/Attributes.hpp>
14 #include <xercesc/dom/DOM.hpp>
15 
20 
24 
28 
29 #include "XMLUtils.h"
30 
31 #include "boost/lexical_cast.hpp"
32 
34 
35 namespace lhef {
36 
37  static void logFileAction(char const* msg, std::string const& fileName) {
38  edm::LogAbsolute("fileAction") << std::setprecision(0) << edm::TimeOfDay() << msg << fileName;
40  }
41 
42 
44  public:
45  Source() {}
46  virtual ~Source() {}
48 };
49 
51  public:
52  FileSource(const std::string &fileURL)
53  {
54  auto storage =
55  StorageFactory::get()->open(fileURL,
57 
58  if (!storage)
59  throw cms::Exception("FileOpenError")
60  << "Could not open LHE file \""
61  << fileURL << "\" for reading"
62  << std::endl;
63 
64  fileStream.reset(new StorageWrap(std::move(storage)));
65  }
66 
67  ~FileSource() override {}
68 
70  { return new XMLDocument(fileStream, handler); }
71 
72  private:
73  std::unique_ptr<StorageWrap> fileStream;
74 };
75 
77  public:
79  {
80  if (inputs.empty())
81  throw cms::Exception("StreamOpenError")
82  << "Empty LHE file string name \""
83  << std::endl;
84 
85  std::stringstream * tmpis = new std::stringstream(inputs);
86  fileStream.reset(tmpis);
87  }
88 
89  ~StringSource() override {}
90 
92  { return new XMLDocument(fileStream, handler); }
93 
94  private:
95  std::unique_ptr<std::istream> fileStream;
96 };
97 
99  public:
100  typedef std::vector<std::pair<std::string,std::string> > wgt_info;
102  impl(nullptr),
103  gotObject(kNone), mode(kNone),
104  xmlHeader(nullptr), xmlEvent(nullptr), headerOk(false), npLO(-99), npNLO(-99) {}
105  ~XMLHandler() override
106  { if (xmlHeader) xmlHeader->release();
107  if (xmlEvent) xmlEvent->release(); }
108 
109  enum Object {
110  kNone = 0,
114  kEvent
115  };
116 
117  void reset() {
118  headerOk = false;
119  weightsinevent.clear();
120  gotObject = kNone;
121  mode = kNone;
122  }
123 
124  const wgt_info& weightInfo() const {return weightsinevent;}
125 
126  protected:
127  void startElement(const XMLCh *const uri,
128  const XMLCh *const localname,
129  const XMLCh *const qname,
130  const Attributes &attributes) override;
131 
132  void endElement(const XMLCh *const uri,
133  const XMLCh *const localname,
134  const XMLCh *const qname) override;
135 
136  void characters (const XMLCh *const chars, const XMLSize_t length) override;
137  void comment (const XMLCh *const chars, const XMLSize_t length) override;
138 
139  private:
140  friend class LHEReader;
141 
142  bool skipEvent = false;
143  std::unique_ptr<DOMImplementation> impl;
147  DOMDocument *xmlHeader;
148  DOMDocument *xmlEvent;
149  std::vector<DOMElement*> xmlNodes,xmlEventNodes;
150  bool headerOk;
151  std::vector<LHERunInfo::Header> headers;
152  wgt_info weightsinevent;
153  int npLO;
154  int npNLO;
155  std::vector<float> scales;
156 };
157 
158 static void attributesToDom(DOMElement *dom, const Attributes &attributes)
159 {
160  for(unsigned int i = 0; i < attributes.getLength(); i++) {
161  const XMLCh *name = attributes.getQName(i);
162  const XMLCh *value = attributes.getValue(i);
163 
164  dom->setAttribute(name, value);
165  }
166 }
167 
168 static void fillHeader(LHERunInfo::Header &header, const char *data,
169  int len = -1)
170 {
171  const char *end = len >= 0 ? (data + len) : nullptr;
172  while(*data && (!end || data < end)) {
173  std::size_t len = std::strcspn(data, "\r\n");
174  if (end && data + len > end)
175  len = end - data;
176  if (data[len] == '\r' && data[len + 1] == '\n')
177  len += 2;
178  else if (data[len])
179  len++;
180  header.addLine(std::string(data, len));
181  data += len;
182  }
183 }
184 
185 void LHEReader::XMLHandler::startElement(const XMLCh *const uri,
186  const XMLCh *const localname,
187  const XMLCh *const qname,
188  const Attributes &attributes)
189 {
190  std::string name((const char*)XMLSimpleStr(qname));
191 
192  if (!headerOk) {
193  if (name != "LesHouchesEvents")
194  throw cms::Exception("InvalidFormat")
195  << "LHE file has invalid header" << std::endl;
196  headerOk = true;
197  return;
198  }
199 
200  if (mode == kHeader) {
201  DOMElement *elem = xmlHeader->createElement(qname);
202  attributesToDom(elem, attributes);
203  xmlNodes.back()->appendChild(elem);
204  xmlNodes.push_back(elem);
205  return;
206  } else if ( mode == kEvent ) {
207 
208  if (skipEvent) {return;}
209 
210  DOMElement *elem = xmlEvent->createElement(qname);
211  attributesToDom(elem, attributes);
212 
213  //TODO this is a hack (even more than the rest of this class)
214  if( name == "rwgt" ) {
215  xmlEventNodes[0]->appendChild(elem);
216  } else if (name == "wgt") {
217  xmlEventNodes[1]->appendChild(elem);
218  }
219  else if (name == "scales") {
220  for (XMLSize_t iscale=0; iscale<attributes.getLength(); ++iscale) {
221  int ipart = 0;
222  const char *scalename = XMLSimpleStr(attributes.getQName(iscale));
223  int nmatch = sscanf(scalename,"pt_clust_%d",&ipart);
224 
225  if (nmatch!=1) {
226  edm::LogError("Generator|LHEInterface")
227  << "invalid attribute in <scales> tag"
228  << std::endl;
229  }
230 
231  float scaleval;
232  const char *scalevalstr = XMLSimpleStr(attributes.getValue(iscale));
233  sscanf(scalevalstr,"%e",&scaleval);
234 
235  scales.push_back(scaleval);
236  }
237  }
238  xmlEventNodes.push_back(elem);
239  return;
240  } else if (mode == kInit) {
241  //skip unknown tags in init block as well
242  return;
243  } else if (mode != kNone) {
244  throw cms::Exception("InvalidFormat")
245  << "LHE file has invalid format" << std::endl;
246  }
247 
248  if (name == "header") {
249  if (!impl)
250  impl.reset(DOMImplementationRegistry::getDOMImplementation(XMLUniStr("Core")));
251 
252  xmlHeader = impl->createDocument(nullptr, qname, nullptr);
253  xmlNodes.resize(1);
254  xmlNodes[0] = xmlHeader->getDocumentElement();
255  mode = kHeader;
256  } if (name == "init") {
257  mode = kInit;
258  } else if (name == "event") {
259  if (!skipEvent)
260  {
261  if (!impl)
262  impl.reset(DOMImplementationRegistry::getDOMImplementation(XMLUniStr("Core")));
263 
264  if(xmlEvent) xmlEvent->release();
265  xmlEvent = impl->createDocument(nullptr, qname, nullptr);
266  weightsinevent.resize(0);
267  scales.clear();
268 
269  npLO = -99;
270  npNLO = -99;
271  const XMLCh *npLOval = attributes.getValue(XMLString::transcode("npLO"));
272  if (npLOval) {
273  const char *npLOs = XMLSimpleStr(npLOval);
274  sscanf(npLOs,"%d",&npLO);
275  }
276  const XMLCh *npNLOval = attributes.getValue(XMLString::transcode("npNLO"));
277  if (npNLOval) {
278  const char *npNLOs = XMLSimpleStr(npNLOval);
279  sscanf(npNLOs,"%d",&npNLO);
280  }
281 
282  xmlEventNodes.resize(1);
283  xmlEventNodes[0] = xmlEvent->getDocumentElement();
284  }
285  mode = kEvent;
286  }
287 
288  if (mode == kNone)
289  throw cms::Exception("InvalidFormat")
290  << "LHE file has invalid format" << std::endl;
291 
292  buffer.clear();
293 }
294 
295 void LHEReader::XMLHandler::endElement(const XMLCh *const uri,
296  const XMLCh *const localname,
297  const XMLCh *const qname)
298 {
299  std::string name((const char*)XMLSimpleStr(qname));
300 
301  if (mode) {
302 
303  if (mode == kHeader && xmlNodes.size() > 1) {
304  xmlNodes.resize(xmlNodes.size() - 1);
305  return;
306  } else if (mode == kHeader) {
307  std::unique_ptr<DOMLSSerializer> writer(impl->createLSSerializer());
308  std::unique_ptr<DOMLSOutput> outputDesc(impl->createLSOutput());
309  assert(outputDesc.get());
310  outputDesc->setEncoding(XMLUniStr("UTF-8"));
311 
312  for(DOMNode *node = xmlNodes[0]->getFirstChild();
313  node; node = node->getNextSibling()) {
314  XMLSimpleStr buffer(writer->writeToString(node));
315 
317  const char *p, *q;
318  DOMElement *elem;
319 
320  switch(node->getNodeType()) {
321  case DOMNode::ELEMENT_NODE:
322  elem = static_cast<DOMElement*>(node);
323  type = (const char*)XMLSimpleStr(
324  elem->getTagName());
325  p = std::strchr((const char*)buffer,
326  '>') + 1;
327  q = std::strrchr(p, '<');
328  break;
329  case DOMNode::COMMENT_NODE:
330  type = "";
331  p = buffer + 4;
332  q = buffer + strlen(buffer) - 3;
333  break;
334  default:
335  type = "<>";
336  p = buffer +
337  std::strspn(buffer, " \t\r\n");
338  if (!*p)
339  continue;
340  q = p + strlen(p);
341  }
343  fillHeader(header, p, q - p);
344  headers.push_back(header);
345  }
346 
347  xmlHeader->release();
348  xmlHeader = nullptr;
349  }
350  else if (name == "event" &&
351  mode == kEvent &&
352  (skipEvent || (!xmlEventNodes.empty()))) { // handling of weights in LHE file
353 
354  if (skipEvent)
355  {
356  gotObject = mode;
357  mode = kNone;
358  return;
359  }
360 
361  for(DOMNode *node = xmlEventNodes[0]->getFirstChild();
362  node; node = node->getNextSibling()) {
363  switch( node->getNodeType() ) {
364  case DOMNode::ELEMENT_NODE: // rwgt
365  for(DOMNode *rwgt = xmlEventNodes[1]->getFirstChild();
366  rwgt; rwgt = rwgt->getNextSibling()) {
367  DOMNode* attr = rwgt->getAttributes()->item(0);
368  XMLSimpleStr atname(attr->getNodeValue());
369  XMLSimpleStr weight(rwgt->getFirstChild()->getNodeValue());
370  switch( rwgt->getNodeType() ) {
371  case DOMNode::ELEMENT_NODE:
372  weightsinevent.push_back(std::make_pair((const char*)atname,
373  (const char*)weight));
374  break;
375  default:
376  break;
377  }
378  }
379  break;
380  case DOMNode::TEXT_NODE: // event information
381  {
382  XMLSimpleStr data(node->getNodeValue());
383  buffer.append(data);
384  }
385  break;
386  default:
387  break;
388  }
389  }
390  }
391  else if (mode == kEvent) {
392  //skip unknown tags
393  return;
394  }
395 
396  if (gotObject != kNone)
397  throw cms::Exception("InvalidState")
398  << "Unexpected pileup in"
399  " LHEReader::XMLHandler::endElement"
400  << std::endl;
401 
402  gotObject = mode;
403  mode = kNone;
404  }
405 }
406 
407 void LHEReader::XMLHandler::characters(const XMLCh *const data_,
408  const XMLSize_t length)
409 {
410  if (mode == kHeader) {
411  DOMText *text = xmlHeader->createTextNode(data_);
412  xmlNodes.back()->appendChild(text);
413  return;
414  }
415 
416  if (XMLSimpleStr::isAllSpaces(data_, length))
417  return;
418 
419  unsigned int offset = 0;
420  while(offset < length && XMLSimpleStr::isSpace(data_[offset]))
421  offset++;
422 
423  if( mode == kEvent ) {
424  if (!skipEvent)
425  {
426  DOMText *text = xmlEvent->createTextNode(data_+offset);
427  xmlEventNodes.back()->appendChild(text);
428  }
429  return;
430  }
431 
432  if (mode == kNone)
433  throw cms::Exception("InvalidFormat")
434  << "LHE file has invalid format" << std::endl;
435 
436  XMLSimpleStr data(data_ + offset);
437  buffer.append(data);
438 }
439 
440 void LHEReader::XMLHandler::comment(const XMLCh *const data_,
441  const XMLSize_t length)
442 {
443  if (mode == kHeader) {
444  DOMComment *comment = xmlHeader->createComment(data_);
445  xmlNodes.back()->appendChild(comment);
446  return;
447  }
448 
449  XMLSimpleStr data(data_);
450 
452  fillHeader(header, data);
453  headers.push_back(header);
454 }
455 
457  fileURLs(params.getUntrackedParameter< std::vector<std::string> >("fileNames")),
458  strName(""),
459  firstEvent(params.getUntrackedParameter<unsigned int>("skipEvents", 0)),
460  maxEvents(params.getUntrackedParameter<int>("limitEvents", -1)),
461  curIndex(0), handler(new XMLHandler())
462 {
463 }
464 
465 LHEReader::LHEReader(const std::vector<std::string> &fileNames,
466  unsigned int firstEvent) :
467  fileURLs(fileNames), strName(""), firstEvent(firstEvent), maxEvents(-1),
468  curIndex(0), handler(new XMLHandler())
469 {
470 }
471 
473  unsigned int firstEvent) :
474  strName(inputs), firstEvent(firstEvent), maxEvents(-1),
475  curIndex(0), handler(new XMLHandler())
476 {
477 }
478 
480 {
481  // Explicitly release "orphaned" resources
482  // that were created through DOM implementation
483  // createXXXX factory method *before* last
484  // XMLPlatformUtils::Terminate is called.
485  handler.release();
486  curDoc.release();
487  curSource.release();
488 }
489 
490  std::shared_ptr<LHEEvent> LHEReader::next(bool* newFileOpened)
491  {
492  while(curDoc.get() || curIndex < fileURLs.size() || (fileURLs.empty() && !strName.empty() ) ) {
493  if (!curDoc.get()) {
494  if(!platform) {
495  //If we read multiple files, the XercesPlatform must live longer than any one
496  // XMLDocument.
498  }
499  if ( !fileURLs.empty() ) {
500  logFileAction(" Initiating request to open LHE file ", fileURLs[curIndex]);
501  curSource.reset(new FileSource(fileURLs[curIndex]));
502  logFileAction(" Successfully opened LHE file ", fileURLs[curIndex]);
503  if ( newFileOpened != nullptr ) *newFileOpened = true;
504  ++curIndex;
505  } else if ( !strName.empty() ) {
506  curSource.reset(new StringSource(strName));
507  }
508  handler->reset();
509  curDoc.reset(curSource->createReader(*handler));
510  curRunInfo.reset();
511  }
512  handler->skipEvent = firstEvent > 0;
513 
514  XMLHandler::Object event = handler->gotObject;
515  handler->gotObject = XMLHandler::kNone;
516 
517 
518  switch(event) {
519  case XMLHandler::kNone:
520  if (!curDoc->parse()) {
521  curDoc.reset();
522  logFileAction(" Closed LHE file ", fileURLs[curIndex - 1]);
523  return std::shared_ptr<LHEEvent>();
524  }
525  break;
526 
527  case XMLHandler::kHeader:
528  break;
529 
530  case XMLHandler::kInit:
531  {
532  std::istringstream data;
533  data.str(handler->buffer);
534  handler->buffer.clear();
535 
536  curRunInfo.reset(new LHERunInfo(data));
537 
538  std::for_each(handler->headers.begin(),
539  handler->headers.end(),
540  boost::bind(&LHERunInfo::addHeader,
541  curRunInfo.get(), _1));
542  handler->headers.clear();
543  }
544  break;
545 
547  break;
548 
549  case XMLHandler::kEvent:
550  {
551  if (!curRunInfo.get())
552  throw cms::Exception("InvalidState")
553  << "Got LHE event without"
554  " initialization." << std::endl;
555 
556  if (firstEvent > 0) {
557  firstEvent--;
558  continue;
559  }
560 
561  if (maxEvents == 0)
562  return std::shared_ptr<LHEEvent>();
563  else if (maxEvents > 0)
564  maxEvents--;
565 
566  std::istringstream data;
567  data.str(handler->buffer);
568  handler->buffer.clear();
569 
570  std::shared_ptr<LHEEvent> lheevent;
571  lheevent.reset(new LHEEvent(curRunInfo, data));
572  const XMLHandler::wgt_info& info = handler->weightsinevent;
573  for( size_t i=0; i< info.size(); ++i ) {
574  double num = -1.0;
575  sscanf(info[i].second.c_str(),"%le",&num);
576  lheevent->addWeight(gen::WeightsInfo(info[i].first,num));
577  }
578  lheevent->setNpLO(handler->npLO);
579  lheevent->setNpNLO(handler->npNLO);
580  //fill scales
581  if (!handler->scales.empty()) {
582  lheevent->setScales(handler->scales);
583  }
584  return lheevent;
585  }
586  }
587  }
588 
589  return std::shared_ptr<LHEEvent>();
590  }
591 
592 } // namespace lhef
593 
std::shared_ptr< void > platform
Definition: LHEReader.h:44
type
Definition: HCALResponse.h:21
XMLDocument * createReader(XMLDocument::Handler &handler) override
Definition: LHEReader.cc:69
static const TGPicture * info(bool iBackgroundIsBlack)
LHEReader(const edm::ParameterSet &params)
Definition: LHEReader.cc:456
static void logFileAction(char const *msg, std::string const &fileName)
Definition: LHEReader.cc:37
static void fillHeader(LHERunInfo::Header &header, const char *data, int len=-1)
Definition: LHEReader.cc:168
std::unique_ptr< std::istream > fileStream
Definition: LHEReader.cc:95
void FlushMessageLog()
StringSource(const std::string &inputs)
Definition: LHEReader.cc:78
void endElement(const XMLCh *const uri, const XMLCh *const localname, const XMLCh *const qname) override
Definition: LHEReader.cc:295
const wgt_info & weightInfo() const
Definition: LHEReader.cc:124
#define nullptr
const std::vector< std::string > fileURLs
Definition: LHEReader.h:31
static bool isSpace(XMLCh ch)
Definition: XMLUtils.h:89
void characters(const XMLCh *const chars, const XMLSize_t length) override
Definition: LHEReader.cc:407
Definition: weight.py:1
std::vector< LHERunInfo::Header > headers
Definition: LHEReader.cc:151
U second(std::pair< T, U > const &p)
static const StorageFactory * get(void)
void addHeader(const Header &header)
Definition: LHERunInfo.h:63
std::unique_ptr< StorageWrap > fileStream
Definition: LHEReader.cc:73
void addLine(const std::string &line)
static bool isAllSpaces(const XMLCh *str, unsigned int length)
Definition: XMLUtils.h:85
const std::string strName
Definition: LHEReader.h:34
std::vector< std::pair< std::string, std::string > > wgt_info
Definition: LHEReader.cc:100
#define end
Definition: vmac.h:39
Definition: value.py:1
static std::shared_ptr< void > platformHandle()
Definition: XMLUtils.h:46
std::unique_ptr< DOMImplementation > impl
Definition: LHEReader.cc:143
std::unique_ptr< XMLDocument > curDoc
Definition: LHEReader.h:41
std::vector< DOMElement * > xmlNodes
Definition: LHEReader.cc:149
def elem(elemtype, innerHTML='', html_class='', kwargs)
Definition: HTMLExport.py:19
void startElement(const XMLCh *const uri, const XMLCh *const localname, const XMLCh *const qname, const Attributes &attributes) override
Definition: LHEReader.cc:185
std::unique_ptr< Source > curSource
Definition: LHEReader.h:40
XMLDocument * createReader(XMLDocument::Handler &handler) override
Definition: LHEReader.cc:91
tuple msg
Definition: mps_check.py:285
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:82
void comment(const XMLCh *const chars, const XMLSize_t length) override
Definition: LHEReader.cc:440
virtual XMLDocument * createReader(XMLDocument::Handler &handler)=0
unsigned int firstEvent
Definition: LHEReader.h:35
std::shared_ptr< LHERunInfo > curRunInfo
Definition: LHEReader.h:42
std::vector< float > scales
Definition: LHEReader.cc:155
std::unique_ptr< XMLHandler > handler
Definition: LHEReader.h:43
std::shared_ptr< LHEEvent > next(bool *newFileOpened=nullptr)
Definition: LHEReader.cc:490
FileSource(const std::string &fileURL)
Definition: LHEReader.cc:52
static void attributesToDom(DOMElement *dom, const Attributes &attributes)
Definition: LHEReader.cc:158
unsigned int curIndex
Definition: LHEReader.h:37
def move(src, dest)
Definition: eostools.py:511
Definition: event.py:1
std::unique_ptr< Storage > open(const std::string &url, int mode=IOFlags::OpenRead) const
#define comment(par)
Definition: vmac.h:163