CMS 3D CMS Logo

LHEReader.cc
Go to the documentation of this file.
1 #include <algorithm>
2 #include <iomanip>
3 #include <iostream>
4 #include <sstream>
5 #include <fstream>
6 #include <cstring>
7 #include <string>
8 #include <vector>
9 #include <cstdio>
10 
11 #include <boost/bind.hpp>
12 
13 #include <xercesc/sax2/Attributes.hpp>
14 #include <xercesc/dom/DOM.hpp>
15 
20 
24 
28 
29 #include "XMLUtils.h"
30 
31 #include "boost/lexical_cast.hpp"
32 
34 
35 namespace lhef {
36 
37  static void logFileAction(char const* msg, std::string const& fileName) {
38  edm::LogAbsolute("fileAction") << std::setprecision(0) << edm::TimeOfDay() << msg << fileName;
40  }
41 
42 
44  public:
45  Source() {}
46  virtual ~Source() {}
48 };
49 
51  public:
52  FileSource(const std::string &fileURL)
53  {
54  auto storage =
55  StorageFactory::get()->open(fileURL,
57 
58  if (!storage)
59  throw cms::Exception("FileOpenError")
60  << "Could not open LHE file \""
61  << fileURL << "\" for reading"
62  << std::endl;
63 
64  fileStream.reset(new StorageWrap(std::move(storage)));
65  }
66 
68 
70  { return new XMLDocument(fileStream, handler); }
71 
72  private:
73  std::auto_ptr<StorageWrap> fileStream;
74 };
75 
77  public:
79  {
80  if (inputs == "")
81  throw cms::Exception("StreamOpenError")
82  << "Empty LHE file string name \""
83  << std::endl;
84 
85  std::stringstream * tmpis = new std::stringstream(inputs);
86  fileStream.reset(tmpis);
87  }
88 
90 
92  { return new XMLDocument(fileStream, handler); }
93 
94  private:
95  std::auto_ptr<std::istream> fileStream;
96 };
97 
99  public:
100  typedef std::vector<std::pair<std::string,std::string> > wgt_info;
102  impl(nullptr),
103  gotObject(kNone), mode(kNone),
104  xmlHeader(0), xmlEvent(0), headerOk(false), npLO(-99), npNLO(-99) {}
106  { if (xmlHeader) xmlHeader->release();
107  if (xmlEvent) xmlEvent->release(); }
108 
109  enum Object {
110  kNone = 0,
114  kEvent
115  };
116 
117  void reset() { headerOk = false; weightsinevent.clear();}
118 
119  const wgt_info& weightInfo() const {return weightsinevent;}
120 
121  protected:
122  void startElement(const XMLCh *const uri,
123  const XMLCh *const localname,
124  const XMLCh *const qname,
125  const Attributes &attributes) override;
126 
127  void endElement(const XMLCh *const uri,
128  const XMLCh *const localname,
129  const XMLCh *const qname) override;
130 
131  virtual void characters (const XMLCh *const chars, const XMLSize_t length) override;
132  virtual void comment (const XMLCh *const chars, const XMLSize_t length) override;
133 
134  private:
135  friend class LHEReader;
136 
137  bool skipEvent = false;
138  std::unique_ptr<DOMImplementation> impl;
142  DOMDocument *xmlHeader;
143  DOMDocument *xmlEvent;
144  std::vector<DOMElement*> xmlNodes,xmlEventNodes;
145  bool headerOk;
146  std::vector<LHERunInfo::Header> headers;
147  wgt_info weightsinevent;
148  int npLO;
149  int npNLO;
150  std::vector<float> scales;
151 };
152 
153 static void attributesToDom(DOMElement *dom, const Attributes &attributes)
154 {
155  for(unsigned int i = 0; i < attributes.getLength(); i++) {
156  const XMLCh *name = attributes.getQName(i);
157  const XMLCh *value = attributes.getValue(i);
158 
159  dom->setAttribute(name, value);
160  }
161 }
162 
163 static void fillHeader(LHERunInfo::Header &header, const char *data,
164  int len = -1)
165 {
166  const char *end = len >= 0 ? (data + len) : 0;
167  while(*data && (!end || data < end)) {
168  std::size_t len = std::strcspn(data, "\r\n");
169  if (end && data + len > end)
170  len = end - data;
171  if (data[len] == '\r' && data[len + 1] == '\n')
172  len += 2;
173  else if (data[len])
174  len++;
175  header.addLine(std::string(data, len));
176  data += len;
177  }
178 }
179 
180 void LHEReader::XMLHandler::startElement(const XMLCh *const uri,
181  const XMLCh *const localname,
182  const XMLCh *const qname,
183  const Attributes &attributes)
184 {
185  std::string name((const char*)XMLSimpleStr(qname));
186 
187  if (!headerOk) {
188  if (name != "LesHouchesEvents")
189  throw cms::Exception("InvalidFormat")
190  << "LHE file has invalid header" << std::endl;
191  headerOk = true;
192  return;
193  }
194 
195  if (mode == kHeader) {
196  DOMElement *elem = xmlHeader->createElement(qname);
197  attributesToDom(elem, attributes);
198  xmlNodes.back()->appendChild(elem);
199  xmlNodes.push_back(elem);
200  return;
201  } else if ( mode == kEvent ) {
202 
203  if (skipEvent) {return;}
204 
205  DOMElement *elem = xmlEvent->createElement(qname);
206  attributesToDom(elem, attributes);
207 
208  //TODO this is a hack (even more than the rest of this class)
209  if( name == "rwgt" ) {
210  xmlEventNodes[0]->appendChild(elem);
211  } else if (name == "wgt") {
212  xmlEventNodes[1]->appendChild(elem);
213  }
214  else if (name == "scales") {
215  for (XMLSize_t iscale=0; iscale<attributes.getLength(); ++iscale) {
216  int ipart = 0;
217  const char *scalename = XMLSimpleStr(attributes.getQName(iscale));
218  int nmatch = sscanf(scalename,"pt_clust_%d",&ipart);
219 
220  if (nmatch!=1) {
221  edm::LogError("Generator|LHEInterface")
222  << "invalid attribute in <scales> tag"
223  << std::endl;
224  }
225 
226  float scaleval;
227  const char *scalevalstr = XMLSimpleStr(attributes.getValue(iscale));
228  sscanf(scalevalstr,"%e",&scaleval);
229 
230  scales.push_back(scaleval);
231  }
232  }
233  xmlEventNodes.push_back(elem);
234  return;
235  } else if (mode == kInit) {
236  //skip unknown tags in init block as well
237  return;
238  } else if (mode != kNone) {
239  throw cms::Exception("InvalidFormat")
240  << "LHE file has invalid format" << std::endl;
241  }
242 
243  if (name == "header") {
244  if (!impl)
245  impl.reset(DOMImplementationRegistry::getDOMImplementation(XMLUniStr("Core")));
246 
247  xmlHeader = impl->createDocument(0, qname, 0);
248  xmlNodes.resize(1);
249  xmlNodes[0] = xmlHeader->getDocumentElement();
250  mode = kHeader;
251  } if (name == "init") {
252  mode = kInit;
253  } else if (name == "event") {
254  if (!skipEvent)
255  {
256  if (!impl)
257  impl.reset(DOMImplementationRegistry::getDOMImplementation(XMLUniStr("Core")));
258 
259  if(xmlEvent) xmlEvent->release();
260  xmlEvent = impl->createDocument(0, qname, 0);
261  weightsinevent.resize(0);
262  scales.clear();
263 
264  npLO = -99;
265  npNLO = -99;
266  const XMLCh *npLOval = attributes.getValue(XMLString::transcode("npLO"));
267  if (npLOval) {
268  const char *npLOs = XMLSimpleStr(npLOval);
269  sscanf(npLOs,"%d",&npLO);
270  }
271  const XMLCh *npNLOval = attributes.getValue(XMLString::transcode("npNLO"));
272  if (npNLOval) {
273  const char *npNLOs = XMLSimpleStr(npNLOval);
274  sscanf(npNLOs,"%d",&npNLO);
275  }
276 
277  xmlEventNodes.resize(1);
278  xmlEventNodes[0] = xmlEvent->getDocumentElement();
279  }
280  mode = kEvent;
281  }
282 
283  if (mode == kNone)
284  throw cms::Exception("InvalidFormat")
285  << "LHE file has invalid format" << std::endl;
286 
287  buffer.clear();
288 }
289 
290 void LHEReader::XMLHandler::endElement(const XMLCh *const uri,
291  const XMLCh *const localname,
292  const XMLCh *const qname)
293 {
294  std::string name((const char*)XMLSimpleStr(qname));
295 
296  if (mode) {
297 
298  if (mode == kHeader && xmlNodes.size() > 1) {
299  xmlNodes.resize(xmlNodes.size() - 1);
300  return;
301  } else if (mode == kHeader) {
302  std::unique_ptr<DOMLSSerializer> writer(impl->createLSSerializer());
303  std::unique_ptr<DOMLSOutput> outputDesc(impl->createLSOutput());
304  assert(outputDesc.get());
305  outputDesc->setEncoding(XMLUniStr("UTF-8"));
306 
307  for(DOMNode *node = xmlNodes[0]->getFirstChild();
308  node; node = node->getNextSibling()) {
309  XMLSimpleStr buffer(writer->writeToString(node));
310 
312  const char *p, *q;
313  DOMElement *elem;
314 
315  switch(node->getNodeType()) {
316  case DOMNode::ELEMENT_NODE:
317  elem = static_cast<DOMElement*>(node);
318  type = (const char*)XMLSimpleStr(
319  elem->getTagName());
320  p = std::strchr((const char*)buffer,
321  '>') + 1;
322  q = std::strrchr(p, '<');
323  break;
324  case DOMNode::COMMENT_NODE:
325  type = "";
326  p = buffer + 4;
327  q = buffer + strlen(buffer) - 3;
328  break;
329  default:
330  type = "<>";
331  p = buffer +
332  std::strspn(buffer, " \t\r\n");
333  if (!*p)
334  continue;
335  q = p + strlen(p);
336  }
338  fillHeader(header, p, q - p);
339  headers.push_back(header);
340  }
341 
342  xmlHeader->release();
343  xmlHeader = 0;
344  }
345  else if (name == "event" &&
346  mode == kEvent &&
347  (skipEvent || (xmlEventNodes.size() >= 1))) { // handling of weights in LHE file
348 
349  if (skipEvent)
350  {
351  gotObject = mode;
352  mode = kNone;
353  return;
354  }
355 
356  for(DOMNode *node = xmlEventNodes[0]->getFirstChild();
357  node; node = node->getNextSibling()) {
358  switch( node->getNodeType() ) {
359  case DOMNode::ELEMENT_NODE: // rwgt
360  for(DOMNode *rwgt = xmlEventNodes[1]->getFirstChild();
361  rwgt; rwgt = rwgt->getNextSibling()) {
362  DOMNode* attr = rwgt->getAttributes()->item(0);
363  XMLSimpleStr atname(attr->getNodeValue());
364  XMLSimpleStr weight(rwgt->getFirstChild()->getNodeValue());
365  switch( rwgt->getNodeType() ) {
366  case DOMNode::ELEMENT_NODE:
367  weightsinevent.push_back(std::make_pair((const char*)atname,
368  (const char*)weight));
369  break;
370  default:
371  break;
372  }
373  }
374  break;
375  case DOMNode::TEXT_NODE: // event information
376  {
377  XMLSimpleStr data(node->getNodeValue());
378  buffer.append(data);
379  }
380  break;
381  default:
382  break;
383  }
384  }
385  }
386  else if (mode == kEvent) {
387  //skip unknown tags
388  return;
389  }
390 
391  if (gotObject != kNone)
392  throw cms::Exception("InvalidState")
393  << "Unexpected pileup in"
394  " LHEReader::XMLHandler::endElement"
395  << std::endl;
396 
397  gotObject = mode;
398  mode = kNone;
399  }
400 }
401 
402 void LHEReader::XMLHandler::characters(const XMLCh *const data_,
403  const XMLSize_t length)
404 {
405  if (mode == kHeader) {
406  DOMText *text = xmlHeader->createTextNode(data_);
407  xmlNodes.back()->appendChild(text);
408  return;
409  }
410 
411  if (XMLSimpleStr::isAllSpaces(data_, length))
412  return;
413 
414  unsigned int offset = 0;
415  while(offset < length && XMLSimpleStr::isSpace(data_[offset]))
416  offset++;
417 
418  if( mode == kEvent ) {
419  if (!skipEvent)
420  {
421  DOMText *text = xmlEvent->createTextNode(data_+offset);
422  xmlEventNodes.back()->appendChild(text);
423  }
424  return;
425  }
426 
427  if (mode == kNone)
428  throw cms::Exception("InvalidFormat")
429  << "LHE file has invalid format" << std::endl;
430 
431  XMLSimpleStr data(data_ + offset);
432  buffer.append(data);
433 }
434 
435 void LHEReader::XMLHandler::comment(const XMLCh *const data_,
436  const XMLSize_t length)
437 {
438  if (mode == kHeader) {
439  DOMComment *comment = xmlHeader->createComment(data_);
440  xmlNodes.back()->appendChild(comment);
441  return;
442  }
443 
444  XMLSimpleStr data(data_);
445 
447  fillHeader(header, data);
448  headers.push_back(header);
449 }
450 
452  fileURLs(params.getUntrackedParameter< std::vector<std::string> >("fileNames")),
453  strName(""),
454  firstEvent(params.getUntrackedParameter<unsigned int>("skipEvents", 0)),
455  maxEvents(params.getUntrackedParameter<int>("limitEvents", -1)),
456  curIndex(0), handler(new XMLHandler())
457 {
458 }
459 
460 LHEReader::LHEReader(const std::vector<std::string> &fileNames,
461  unsigned int firstEvent) :
462  fileURLs(fileNames), strName(""), firstEvent(firstEvent), maxEvents(-1),
463  curIndex(0), handler(new XMLHandler())
464 {
465 }
466 
468  unsigned int firstEvent) :
469  strName(inputs), firstEvent(firstEvent), maxEvents(-1),
470  curIndex(0), handler(new XMLHandler())
471 {
472 }
473 
475 {
476  // Explicitly release "orphaned" resources
477  // that were created through DOM implementation
478  // createXXXX factory method *before* last
479  // XMLPlatformUtils::Terminate is called.
480  handler.release();
481  curDoc.release();
482  curSource.release();
483 }
484 
485  boost::shared_ptr<LHEEvent> LHEReader::next(bool* newFileOpened)
486  {
487  while(curDoc.get() || curIndex < fileURLs.size() || (fileURLs.size() == 0 && strName != "" ) ) {
488  if (!curDoc.get()) {
489  if ( fileURLs.size() > 0 ) {
490  logFileAction(" Initiating request to open LHE file ", fileURLs[curIndex]);
491  curSource.reset(new FileSource(fileURLs[curIndex]));
492  logFileAction(" Successfully opened LHE file ", fileURLs[curIndex]);
493  if ( newFileOpened != nullptr ) *newFileOpened = true;
494  ++curIndex;
495  } else if ( strName != "" ) {
496  curSource.reset(new StringSource(strName));
497  }
498  handler->reset();
499  curDoc.reset(curSource->createReader(*handler));
500  curRunInfo.reset();
501  }
502  handler->skipEvent = firstEvent > 0;
503 
504  XMLHandler::Object event = handler->gotObject;
505  handler->gotObject = XMLHandler::kNone;
506 
507 
508  switch(event) {
509  case XMLHandler::kNone:
510  if (!curDoc->parse()) {
511  curDoc.reset();
512  logFileAction(" Closed LHE file ", fileURLs[curIndex - 1]);
513  return boost::shared_ptr<LHEEvent>();
514  }
515  break;
516 
517  case XMLHandler::kHeader:
518  break;
519 
520  case XMLHandler::kInit:
521  {
522  std::istringstream data;
523  data.str(handler->buffer);
524  handler->buffer.clear();
525 
526  curRunInfo.reset(new LHERunInfo(data));
527 
528  std::for_each(handler->headers.begin(),
529  handler->headers.end(),
530  boost::bind(&LHERunInfo::addHeader,
531  curRunInfo.get(), _1));
532  handler->headers.clear();
533  }
534  break;
535 
537  break;
538 
539  case XMLHandler::kEvent:
540  {
541  if (!curRunInfo.get())
542  throw cms::Exception("InvalidState")
543  << "Got LHE event without"
544  " initialization." << std::endl;
545 
546  if (firstEvent > 0) {
547  firstEvent--;
548  continue;
549  }
550 
551  if (maxEvents == 0)
552  return boost::shared_ptr<LHEEvent>();
553  else if (maxEvents > 0)
554  maxEvents--;
555 
556  std::istringstream data;
557  data.str(handler->buffer);
558  handler->buffer.clear();
559 
560  boost::shared_ptr<LHEEvent> lheevent;
561  lheevent.reset(new LHEEvent(curRunInfo, data));
562  const XMLHandler::wgt_info& info = handler->weightsinevent;
563  for( size_t i=0; i< info.size(); ++i ) {
564  double num = -1.0;
565  sscanf(info[i].second.c_str(),"%le",&num);
566  lheevent->addWeight(gen::WeightsInfo(info[i].first,num));
567  }
568  lheevent->setNpLO(handler->npLO);
569  lheevent->setNpNLO(handler->npNLO);
570  //fill scales
571  if (handler->scales.size()>0) {
572  lheevent->setScales(handler->scales);
573  }
574  return lheevent;
575  }
576  }
577  }
578 
579  return boost::shared_ptr<LHEEvent>();
580  }
581 
582 } // namespace lhef
583 
type
Definition: HCALResponse.h:21
XMLDocument * createReader(XMLDocument::Handler &handler) override
Definition: LHEReader.cc:69
static const TGPicture * info(bool iBackgroundIsBlack)
LHEReader(const edm::ParameterSet &params)
Definition: LHEReader.cc:451
static void logFileAction(char const *msg, std::string const &fileName)
Definition: LHEReader.cc:37
static void fillHeader(LHERunInfo::Header &header, const char *data, int len=-1)
Definition: LHEReader.cc:163
void FlushMessageLog()
StringSource(const std::string &inputs)
Definition: LHEReader.cc:78
void endElement(const XMLCh *const uri, const XMLCh *const localname, const XMLCh *const qname) override
Definition: LHEReader.cc:290
const wgt_info & weightInfo() const
Definition: LHEReader.cc:119
const std::vector< std::string > fileURLs
Definition: LHEReader.h:33
static bool isSpace(XMLCh ch)
Definition: XMLUtils.h:87
virtual void characters(const XMLCh *const chars, const XMLSize_t length) override
Definition: LHEReader.cc:402
Definition: weight.py:1
#define nullptr
std::vector< LHERunInfo::Header > headers
Definition: LHEReader.cc:146
U second(std::pair< T, U > const &p)
boost::shared_ptr< LHERunInfo > curRunInfo
Definition: LHEReader.h:44
static const StorageFactory * get(void)
void addHeader(const Header &header)
Definition: LHERunInfo.h:63
void addLine(const std::string &line)
static bool isAllSpaces(const XMLCh *str, unsigned int length)
Definition: XMLUtils.h:83
const std::string strName
Definition: LHEReader.h:36
std::vector< std::pair< std::string, std::string > > wgt_info
Definition: LHEReader.cc:100
#define end
Definition: vmac.h:37
Definition: value.py:1
std::unique_ptr< DOMImplementation > impl
Definition: LHEReader.cc:138
std::unique_ptr< XMLDocument > curDoc
Definition: LHEReader.h:43
std::vector< DOMElement * > xmlNodes
Definition: LHEReader.cc:144
def elem(elemtype, innerHTML='', html_class='', kwargs)
Definition: HTMLExport.py:18
void startElement(const XMLCh *const uri, const XMLCh *const localname, const XMLCh *const qname, const Attributes &attributes) override
Definition: LHEReader.cc:180
std::unique_ptr< Source > curSource
Definition: LHEReader.h:42
boost::shared_ptr< LHEEvent > next(bool *newFileOpened=nullptr)
Definition: LHEReader.cc:485
XMLDocument * createReader(XMLDocument::Handler &handler) override
Definition: LHEReader.cc:91
std::auto_ptr< StorageWrap > fileStream
Definition: LHEReader.cc:73
char data[epos_bytes_allocation]
Definition: EPOS_Wrapper.h:82
virtual void comment(const XMLCh *const chars, const XMLSize_t length) override
Definition: LHEReader.cc:435
virtual XMLDocument * createReader(XMLDocument::Handler &handler)=0
unsigned int firstEvent
Definition: LHEReader.h:37
std::vector< float > scales
Definition: LHEReader.cc:150
std::unique_ptr< XMLHandler > handler
Definition: LHEReader.h:45
std::auto_ptr< std::istream > fileStream
Definition: LHEReader.cc:95
FileSource(const std::string &fileURL)
Definition: LHEReader.cc:52
static void attributesToDom(DOMElement *dom, const Attributes &attributes)
Definition: LHEReader.cc:153
unsigned int curIndex
Definition: LHEReader.h:39
def move(src, dest)
Definition: eostools.py:510
Definition: event.py:1
std::unique_ptr< Storage > open(const std::string &url, int mode=IOFlags::OpenRead) const
#define comment(par)
Definition: vmac.h:161