CMS 3D CMS Logo

/afs/cern.ch/work/a/aaltunda/public/www/CMSSW_6_2_5/src/GeneratorInterface/LHEInterface/src/XMLUtils.cc

Go to the documentation of this file.
00001 #include <iostream>
00002 #include <memory>
00003 #include <string>
00004 #include <cstring>
00005 
00006 #include <xercesc/util/PlatformUtils.hpp>
00007 #include <xercesc/util/XMLString.hpp>
00008 #include <xercesc/util/XMLUni.hpp>
00009 #include <xercesc/sax2/SAX2XMLReader.hpp>
00010 #include <xercesc/sax2/XMLReaderFactory.hpp>
00011 
00012 #include "FWCore/Utilities/interface/Exception.h"
00013 
00014 #include "Utilities/StorageFactory/interface/IOTypes.h"
00015 #include "Utilities/StorageFactory/interface/Storage.h"
00016 
00017 #include "XMLUtils.h"
00018 
00019 XERCES_CPP_NAMESPACE_USE
00020 #define BUF_SIZE 8192 
00021 
00022 namespace lhef {
00023 
00024 StorageWrap::StorageWrap(Storage *storage) :
00025         storage(storage)
00026 {
00027 }
00028 
00029 StorageWrap::~StorageWrap()
00030 {
00031         storage->close();
00032 }
00033 
00034 unsigned int XMLDocument::XercesPlatform::instances = 0;
00035 
00036 XMLDocument::XercesPlatform::XercesPlatform()
00037 {
00038         if (!instances++) {
00039                 try {
00040                         XMLPlatformUtils::Initialize();
00041                 } catch(const XMLException &e) {
00042                         throw cms::Exception("XMLDocument")
00043                                 << "XMLPlatformUtils::Initialize failed "
00044                                    "because of: "
00045                                 << XMLSimpleStr(e.getMessage()) << std::endl;
00046                 }
00047         }
00048 }
00049 
00050 XMLDocument::XercesPlatform::~XercesPlatform()
00051 {
00052         if (!--instances)
00053                 XMLPlatformUtils::Terminate();
00054 }
00055 
00056 XMLDocument::XMLDocument(std::auto_ptr<std::istream> &in, Handler &handler) :
00057         platform(new XercesPlatform()),
00058         source(new STLInputSource(in)),
00059         parser(XMLReaderFactory::createXMLReader()),
00060         done(false)
00061 {
00062         init(handler);
00063 }
00064 
00065 XMLDocument::XMLDocument(std::auto_ptr<StorageWrap> &in, Handler &handler) :
00066         platform(new XercesPlatform()),
00067         source(new StorageInputSource(in)),
00068         parser(XMLReaderFactory::createXMLReader()),
00069         done(false)
00070 {
00071         init(handler);
00072 }
00073 
00074 void XMLDocument::init(Handler &handler)
00075 {
00076         try {
00077                 parser->setFeature(XMLUni::fgSAX2CoreValidation, false);
00078                 parser->setFeature(XMLUni::fgSAX2CoreNameSpaces, false);
00079                 parser->setFeature(XMLUni::fgXercesSchema, false);
00080                 parser->setFeature(XMLUni::fgXercesSchemaFullChecking, false);
00081 
00082                 parser->setContentHandler(&handler);
00083                 parser->setLexicalHandler(&handler);
00084                 parser->setErrorHandler(&handler);
00085 
00086                 if (!parser->parseFirst(*source, token))
00087                         throw cms::Exception("XMLParseError")
00088                                 << "SAXParser::parseFirst failed" << std::endl;
00089         } catch(const XMLException &e) {
00090                 throw cms::Exception("XMLDocument")
00091                         << "XMLPlatformUtils::Initialize failed because of "
00092                         << XMLSimpleStr(e.getMessage()) << std::endl;
00093         } catch(const SAXException &e) {
00094                 throw cms::Exception("XMLDocument")
00095                         << "XML parser reported: "
00096                         << XMLSimpleStr(e.getMessage()) << "." << std::endl;  
00097         }
00098 }
00099 
00100 XMLDocument::~XMLDocument()
00101 {
00102 }
00103 
00104 bool XMLDocument::parse()
00105 {
00106         try {
00107                 if (done || parser->getErrorCount())
00108                         return false;
00109 
00110                 done = !parser->parseNext(token);
00111         } catch(const XMLException &e) {
00112                 throw cms::Exception("XMLDocument")
00113                         << "XMLPlatformUtils::Initialize failed because of "
00114                         << XMLSimpleStr(e.getMessage()) << std::endl;
00115         } catch(const SAXException &e) {
00116                 throw cms::Exception("XMLDocument")
00117                         << "XML parser reported: "
00118                         << XMLSimpleStr(e.getMessage()) << "." << std::endl;  
00119         }
00120 
00121         return !done;
00122 }
00123 
00124 CBInputStream::Reader::~Reader()
00125 {
00126 }
00127 
00128 CBInputStream::CBInputStream(Reader &reader) :
00129         reader(reader)
00130 {
00131 }
00132 
00133 CBInputStream::~CBInputStream()
00134 {
00135 }
00136 
00137 unsigned int CBInputStream::readBytes(XMLByte* const buf,
00138                                       const unsigned int size)
00139 {
00140         char *rawBuf = reinterpret_cast<char*>(buf);
00141         unsigned int bytes = size * sizeof(XMLByte);
00142         unsigned int read = 0;
00143 
00144         while(read < bytes) {
00145                 if (buffer.empty()) {
00146                         buffer = reader.data();
00147                         if (buffer.empty())
00148                                 break;
00149                 }
00150 
00151                 unsigned int len = buffer.length();
00152                 unsigned int rem = bytes - read;
00153                 if (rem < len) {
00154                         std::memcpy(rawBuf + read, buffer.c_str(), rem);
00155                         buffer.erase(0, rem);
00156                         read += rem;
00157                         break;
00158                 }
00159 
00160                 std::memcpy(rawBuf + read, buffer.c_str(), len);
00161                 buffer.clear();
00162                 read += len;
00163         }
00164 
00165         read /= sizeof(XMLByte);
00166         pos += read;
00167 
00168         return read;
00169 }
00170 
00171 STLInputStream::STLInputStream(std::istream &in) :
00172         in(in)
00173 {
00174         if (in.bad())
00175                 throw cms::Exception("FileStreamError")
00176                         << "I/O stream bad in STLInputStream::STLInputStream()"
00177                         << std::endl;
00178 }
00179 
00180 STLInputStream::~STLInputStream()
00181 {
00182 }
00183 
00184 unsigned int STLInputStream::readBytes(XMLByte* const buf,
00185                                        const unsigned int size)
00186 {
00187         char *rawBuf = reinterpret_cast<char*>(buf);
00188         unsigned int bytes = size * sizeof(XMLByte);
00189         in.read(rawBuf, bytes);
00190         unsigned int readBytes = in.gcount();
00191 
00192         if (in.bad())
00193                 throw cms::Exception("FileStreamError")
00194                         << "I/O stream bad in STLInputStream::readBytes()"
00195                         << std::endl;
00196 
00197         unsigned int read = (unsigned int)(readBytes / sizeof(XMLByte));
00198         unsigned int rest = (unsigned int)(readBytes % sizeof(XMLByte));
00199         for(unsigned int i = 1; i <= rest; i++)
00200                 in.putback(rawBuf[readBytes - i]);
00201 
00202         pos += read;
00203         return read;
00204 }
00205 
00206 StorageInputStream::StorageInputStream(StorageWrap &in) :
00207         in(in),
00208         lstr(LZMA_STREAM_INIT),
00209         compression_(false),
00210         lasttotal_(0)
00211 {
00212   // Check the kind of file.
00213   char header[6];
00214   /*unsigned int s = */ in->read(header, 6);
00215   in->position(0, Storage::SET);
00216   // Let's use lzma to start with.
00217   if (header[1] == '7'
00218       && header[2] == 'z'
00219       && header[3] == 'X'
00220       && header[4] == 'Z')
00221   {
00222     compression_ = true;
00223     lstr = LZMA_STREAM_INIT;
00224     // We store the beginning of the outBuffer to make sure
00225     // we can always update previous results.
00226 
00227 #if LZMA_VERSION <= UINT32_C(49990030)
00228     int ret = lzma_auto_decoder(&lstr, NULL, NULL);
00229 #else
00230     int ret = lzma_auto_decoder(&lstr, -1, 0);
00231 #endif
00232 
00233     if (ret != LZMA_OK)
00234     {
00235       lzma_end(&lstr);
00236       throw cms::Exception("IO") << "Error while reading compressed LHE file";
00237     }
00238   }
00239 }
00240 
00241 StorageInputStream::~StorageInputStream()
00242 {
00243   lzma_end(&(lstr));
00244 }
00245 
00246 unsigned int StorageInputStream::readBytes(XMLByte* const buf,
00247                                            const unsigned int size)
00248 {
00249   // Simple read-in write-out in case
00250   if (!compression_)
00251   {
00252     void *rawBuf = reinterpret_cast<void*>(buf);
00253     unsigned int bytes = size * sizeof(XMLByte);
00254     unsigned int readBytes = in->read(rawBuf, bytes);
00255 
00256     unsigned int read = (unsigned int)(readBytes / sizeof(XMLByte));
00257     unsigned int rest = (unsigned int)(readBytes % sizeof(XMLByte));
00258     if (rest)
00259       in->position(-(IOOffset)rest, Storage::CURRENT);
00260 
00261     /*for (unsigned int i = 0; i < read; ++i){
00262       std::cout << buf[i] ;
00263     }*/
00264  
00265     pos += read;
00266     return read;
00267   }
00268   // Compressed case. 
00269   // We simply read as many bytes as we can and we 
00270   // uncompress them in the output buffer.
00271   // We never decompress more bytes we were asked by
00272   // xerces. 
00273   // In case we read from file more bytes than needed 
00274   // we simply rollback by the (hopefully) correct amount.
00275   // If we don't read enough bytes, we simply return
00276   // the amount of bytes read and we wait for being called
00277   // again by xerces.
00278   unsigned int bytes = size * sizeof(XMLByte);
00279 //  std::cout << "bites " << bytes << std::endl;
00280   uint8_t inBuf[BUF_SIZE];
00281   unsigned int rd = in->read((void*)inBuf, BUF_SIZE);
00282   lstr.next_in = inBuf;
00283   lstr.avail_in = rd;
00284   lstr.next_out = buf;
00285   lstr.avail_out = bytes;
00286 /*  for (unsigned int i = 0; i < size; ++i){
00287     std::cout << buf[i] ;
00288   }  
00289   std::cout << std::endl;*/
00290 
00291   int ret = lzma_code(&lstr, LZMA_RUN);
00292   if(ret != LZMA_OK && ret != LZMA_STREAM_END) {  /* decompression error */
00293     lzma_end(&lstr);
00294     throw cms::Exception("IO") << "Error while reading compressed LHE file";
00295   }
00296   // If we did not consume everything we put it back.
00297 //  std::cout << "lstr.avail_in " << lstr.avail_in << " lstr.total_in " << lstr.total_in << std::endl;
00298 //  std::cout << "lstr.avail_out " << lstr.avail_out << " lstr.total_out " << lstr.total_out << std::endl;
00299   if (lstr.avail_in){
00300     std::cout << "rolling back" << std::endl;
00301     in->position(-(IOOffset)(lstr.avail_in), Storage::CURRENT);    
00302   }  
00303   pos = lstr.total_out;
00304   unsigned int read = lstr.total_out - lasttotal_;
00305   lasttotal_ = lstr.total_out;
00306   return read;
00307 }
00308 
00309 } // namespace lhef