CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_0/src/DataFormats/Common/interface/Trie.h

Go to the documentation of this file.
00001 #ifndef DataFormats_Common_Trie_h
00002 #define DataFormats_Common_Trie_h
00003 /*
00004 ** 
00005 ** 
00006 ** Copyright (C) 2006 Julien Lemoine
00007 ** This program is free software; you can redistribute it and/or modify
00008 ** it under the terms of the GNU Lesser General Public License as published by
00009 ** the Free Software Foundation; either version 2 of the License, or
00010 ** (at your option) any later version.
00011 ** 
00012 ** This program is distributed in the hope that it will be useful,
00013 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 ** GNU Lesser General Public License for more details.
00016 ** 
00017 ** You should have received a copy of the GNU Lesser General Public License
00018 ** along with this program; if not, write to the Free Software
00019 ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00020 **
00021 **
00022 **   modified by Vincenzo Innocente on 15/08/2007
00023 **
00024 */
00025 
00026 
00027 #include <list>
00028 #include <string>
00029 
00030 namespace edm
00031 {
00032   // fwd declaration
00033   template <typename T>
00034   class TrieNode;
00035 
00040   template <typename T>
00041   class TrieFactory
00042     {
00043     public:
00044       TrieFactory(unsigned paquetSize);
00045       ~TrieFactory();
00046 
00047     private:
00049       TrieFactory();
00051       TrieFactory(const TrieFactory &e);
00053       TrieFactory& operator=(const TrieFactory &e);
00054 
00055     public:
00056       TrieNode<T>* newNode(const T &value);
00057       void clear();
00058 
00059     private:
00060       unsigned                  _paquetSize;
00061       std::list<TrieNode<T>*>   _allocatedNodes;
00062       TrieNode<T>               *_lastNodes;
00063       unsigned                  _nbUsedInLastNodes;
00064     };
00065 }
00066 
00067 
00068 namespace edm
00069 {
00070   template<typename T>
00071   class TrieNodeIter;
00072 
00073 
00079   template <typename T>
00080   class TrieNode
00081   {
00082   public:
00083 
00084     typedef TrieNodeIter<T> const_iterator;
00085 
00086     TrieNode();
00087     ~TrieNode();
00088 
00089   private:
00091     TrieNode(const TrieNode &e);
00093     TrieNode& operator=(const TrieNode &e);
00094 
00095   public:
00097     void setValue(const T &val);
00099     const T& value() const;
00100 
00102     const TrieNode<T>* brother() const;
00103     TrieNode<T>* brother();
00105     unsigned char brotherLabel() const;
00106 
00107 
00109     TrieNodeIter<T> begin() const;
00111     TrieNodeIter<T> end() const;
00112 
00113     // get first sub Node
00114     const TrieNode<T>* subNode() const;
00115     TrieNode<T>* subNode();
00116     unsigned char subNodeLabel() const;
00117 
00118     // Looking for a sub node
00119     const TrieNode<T>* subNodeByLabel(unsigned char chr) const;
00120     TrieNode<T>* subNodeByLabel(unsigned char chr);
00121 
00122     // add an edge
00123     void addSubNode(unsigned char chr, TrieNode<T> *node);
00124 
00126     void display(std::ostream &os, unsigned offset, unsigned char label) const;
00127 
00129     void clear();
00130 
00131   private:
00132     template <typename Node>
00133     Node _sequentialSearch(Node first, unsigned char label,
00134                            unsigned char val) const;
00136     void _setBrother(TrieNode<T> *brother, unsigned char brotherLabel);
00138     void _addBrother(unsigned char chr, TrieNode<T> *brother);
00143     const TrieNode<T>* _getBrother(unsigned char chr) const;
00148     TrieNode<T>* _getBrother(unsigned char chr);
00149 
00150   private:
00152     TrieNode<T>         *_brother;
00154     unsigned char       _brotherLabel;
00156     TrieNode<T>         *_firstSubNode;
00158     unsigned char       _firstSubNodeLabel;
00160     T                   _value;
00161   };
00162 }
00163 
00164 #include <ostream>
00165 
00166 namespace edm
00167 {
00168   //fwd declaration
00169   template <typename T>
00170     class TrieFactory;
00171   template <typename T>
00172     class TrieNode;
00173 
00178   template <typename T>
00179     class Trie
00180     {
00181     public:
00184       Trie(const T &empty);
00185       ~Trie();
00186 
00187     private:
00189       Trie();
00191       Trie(const Trie &e);
00193       Trie& operator=(const Trie &e);
00194     
00195     public:
00198       void insert(std::string const & str, const T &value);
00199       void insert(const char *str, unsigned strLen, const T &value);
00202       void setEntry(std::string const & str, const T &value);
00203       void setEntry(const char *str, unsigned strLen, const T &value);
00205       const T& find(std::string const & str) const;
00206       const T& find(const char *str, unsigned strLen) const;
00208       const TrieNode<T>* node(std::string const & str) const;
00209       const TrieNode<T>* node(const char *str, unsigned strLen) const;
00211       const TrieNode<T>* initialNode() const;
00213       void display(std::ostream &os);
00215       void clear();
00216 
00217     private:
00218       TrieNode<T>* _addEntry(const char *str, unsigned strLen);
00219 
00220     private:
00222       T                 _empty;
00224       TrieFactory<T>    *_factory;
00226       TrieNode<T>               *_initialNode;
00227     };
00228 }
00229 
00230 
00231 
00232 #include <boost/iterator/iterator_facade.hpp>
00233 
00234 #include<string>
00235 #include<ostream>
00236 
00237 
00238 // iterators and visitors
00239 
00240 namespace edm{
00241 
00242   template<typename T>
00243   class TrieNodeIter
00244     : public boost::iterator_facade<TrieNodeIter<T>,
00245                                     TrieNode<T> const, 
00246                                     boost::forward_traversal_tag >
00247   {
00248     
00249   public:
00250     typedef TrieNodeIter<T> self;
00251     typedef TrieNode<T> const node_base;
00252     TrieNodeIter()
00253       : m_node(0), m_label(0)
00254     {}
00255     
00256     explicit TrieNodeIter(node_base* p)
00257       : m_node(p ? p->subNode() : 0), 
00258         m_label(p ? p->subNodeLabel() : 0)
00259     {}
00260     
00261     unsigned char label() const { return m_label;}
00262   private:
00263     friend class boost::iterator_core_access;
00264     
00265     void increment() {
00266       m_label = m_node->brotherLabel();
00267       m_node = m_node->brother(); 
00268     }
00269     
00270     bool equal(self const& other) const
00271     {
00272       return this->m_node == other.m_node;
00273     }
00274     
00275     node_base& dereference() const { return *m_node; }
00276     
00277     node_base* m_node;
00278     unsigned char m_label;
00279   };
00280   
00281   
00283   template<typename V, typename T>
00284   void walkTrie(V & v,  TrieNode<T> const  & n, std::string const & label="") {
00285     typedef TrieNode<T> const node_base;
00286     typedef TrieNodeIter<T> node_iterator;
00287     node_iterator e;
00288     for (node_iterator p(&n); p!=e; ++p) {
00289       v((*p).value(),label+(char)p.label());
00290       walkTrie(v,*p,label+(char)p.label());
00291     }
00292   }
00293    
00294 
00295 
00297   template<typename V, typename T>
00298   bool iterateTrieLeaves(V & v,  TrieNode<T> const  & n, std::string const & label="") {
00299     typedef TrieNode<T> const node_base;
00300     typedef TrieNodeIter<T> node_iterator;
00301     node_iterator e;
00302     node_iterator p(&n);
00303     if (p==e) return true;
00304     for (; p!=e; ++p) {
00305       if (iterateTrieLeaves(v,*p,label+(char)p.label()) )
00306         v((*p).value(),label+(char)p.label());
00307     }
00308     return false;
00309   }
00310 
00311 
00312 }
00313 
00314 //
00315 //----------------------------------------------------------------
00316 //
00317 // implementations
00318 
00319 
00320 template <typename T>
00321 edm::TrieFactory<T>::TrieFactory(unsigned paquetSize) :
00322   _paquetSize(paquetSize), _lastNodes(0x0), _nbUsedInLastNodes(0)
00323 {
00324   _lastNodes = new TrieNode<T>[paquetSize];
00325 }
00326 
00327 template <typename T>
00328 edm::TrieFactory<T>::~TrieFactory()
00329 {
00330   typename std::list<TrieNode<T>*>::const_iterator it;
00331 
00332   for (it = _allocatedNodes.begin(); it != _allocatedNodes.end(); ++it)
00333     delete[] *it;
00334   if (_lastNodes)
00335     delete[] _lastNodes;
00336 }
00337 
00338 template <typename T>
00339 edm::TrieNode<T>* edm::TrieFactory<T>::newNode(const T &value)
00340 {
00341   if (_nbUsedInLastNodes == _paquetSize)
00342     {
00343       _allocatedNodes.push_back(_lastNodes);
00344       _nbUsedInLastNodes = 0;
00345       _lastNodes = new TrieNode<T>[_paquetSize];
00346     }
00347   TrieNode<T> *res = &_lastNodes[_nbUsedInLastNodes];
00348   ++_nbUsedInLastNodes;
00349   res->setValue(value);
00350   res->clear();
00351   return res;
00352 }
00353 
00354 template <typename T>
00355 void edm::TrieFactory<T>::clear()
00356 {
00357   typename std::list<TrieNode<T>*>::const_iterator it;
00358   for (it = _allocatedNodes.begin(); it != _allocatedNodes.end(); ++it)
00359     delete[] *it;
00360   _allocatedNodes.clear();
00361   _nbUsedInLastNodes = 0;
00362 }
00363 
00364 
00365 template <typename T>
00366 edm::TrieNode<T>::TrieNode() :
00367   _brother(0), _brotherLabel(0), _firstSubNode(0), _firstSubNodeLabel(0),_value()
00370 {
00371 }
00372 
00373 template <typename T>
00374 edm::TrieNode<T>::~TrieNode()
00375 {
00376   // do not delete _brother and _firstSubNode because they are
00377   // allocated by factory (TrieFactory) and factory will delete them
00378 }
00379 
00380 
00381 template <typename T>   
00382 edm::TrieNodeIter<T> 
00383 edm::TrieNode<T>::begin() const {
00384   return const_iterator(this);
00385 }
00386 template <typename T>   
00387 edm::TrieNodeIter<T> 
00388 edm::TrieNode<T>::end() const {
00389   return const_iterator(0);
00390 }
00391 
00392 template <typename T>
00393 void edm::TrieNode<T>::setValue(const T &val)
00394 {
00395   _value = val;
00396 }
00397 
00398 template <typename T>
00399 const T& edm::TrieNode<T>::value() const
00400 {
00401   return _value;
00402 }
00403 
00404 template <typename T>
00405 const edm::TrieNode<T>* edm::TrieNode<T>::brother() const
00406 {
00407   return _brother;
00408 }
00409 
00410 template <typename T>
00411 edm::TrieNode<T>* edm::TrieNode<T>::brother()
00412 {
00413   return _brother;
00414 }
00415 
00416 template <typename T>
00417 const edm::TrieNode<T>* edm::TrieNode<T>::_getBrother(unsigned char chr) const
00418 {
00419   const TrieNode<T> *brother = _brother;
00420   return _sequentialSearch(brother, _brotherLabel, chr);
00421 }
00422 
00423 template <typename T>
00424 edm::TrieNode<T>* edm::TrieNode<T>::_getBrother(unsigned char chr)
00425 {
00426   return _sequentialSearch(_brother, _brotherLabel, chr);
00427 }
00428 
00429 template <typename T>
00430 void edm::TrieNode<T>::_addBrother(unsigned char chr, TrieNode<T> *brother)
00431 {
00432   if (!_brother || _brotherLabel > chr)
00433     {
00434       brother->_setBrother(_brother, _brotherLabel);
00435       _brother = brother;
00436       _brotherLabel = chr;
00437     }
00438   else
00439     _brother->_addBrother(chr, brother);
00440 }
00441 
00442 template <typename T>
00443 unsigned char edm::TrieNode<T>::brotherLabel() const
00444 {
00445   return _brotherLabel;
00446 }
00447 
00448 template <typename T>
00449 const edm::TrieNode<T>* edm::TrieNode<T>::subNode() const
00450 {
00451   return _firstSubNode;
00452 }
00453 
00454 template <typename T>
00455 edm::TrieNode<T>* edm::TrieNode<T>::subNode()
00456 {
00457   return _firstSubNode;
00458 }
00459 
00460 template <typename T>
00461 unsigned char edm::TrieNode<T>::subNodeLabel() const
00462 {
00463   return _firstSubNodeLabel;
00464 }
00465 
00466 template <typename T>
00467 const edm::TrieNode<T>* edm::TrieNode<T>::subNodeByLabel(unsigned char chr) const
00468 {
00469   const TrieNode<T> *first = _firstSubNode;
00470   return _sequentialSearch(first, _firstSubNodeLabel, chr);
00471 }
00472 
00473 template <typename T>
00474 edm::TrieNode<T>* edm::TrieNode<T>::subNodeByLabel(unsigned char chr)
00475 {
00476   return _sequentialSearch(_firstSubNode, _firstSubNodeLabel, chr);
00477 }
00478 
00479 template <typename T>
00480 void edm::TrieNode<T>::addSubNode(unsigned char chr, TrieNode<T> *node)
00481 {
00482   if (!_firstSubNode || _firstSubNodeLabel > chr)
00483     {
00484       node->_setBrother(_firstSubNode, _firstSubNodeLabel);
00485       _firstSubNode = node;
00486       _firstSubNodeLabel = chr;
00487     }
00488   else
00489     _firstSubNode->_addBrother(chr, node);
00490 }
00491 
00492 template <typename T>
00493 template <typename Node>
00494 inline Node edm::TrieNode<T>::_sequentialSearch(Node first, unsigned char label, unsigned char val) const
00495 {
00496   if (first && label <= val)
00497     {
00498       if (label == val)
00499         return first;
00500       return first->_getBrother(val);
00501     }
00502   return 0x0;
00503 }
00504 
00505 template <typename T>
00506 void edm::TrieNode<T>::_setBrother(TrieNode<T> *brother, unsigned char brotherLabel)
00507 {
00508   _brother = brother;
00509   _brotherLabel = brotherLabel;
00510 }
00511 
00512 template <typename T>
00513 void edm::TrieNode<T>::display(std::ostream &os, unsigned offset, unsigned char label) const
00514 {
00515   unsigned int i;
00516   for (i = 0; i < offset; ++i)
00517     os << " ";
00518   if (label)
00519     os << "label[" << label << "] ";
00520   os << "value[" << _value << "]" << std::endl;
00521   if (_firstSubNode)
00522     _firstSubNode->display(os, offset + 2, _firstSubNodeLabel);
00523   if (_brother)
00524     _brother->display(os, offset, _brotherLabel);
00525 }
00526 
00527 template <typename T>
00528 void edm::TrieNode<T>::clear()
00529 {
00530   _brother = 0x0;
00531   _brotherLabel = 0;
00532   _firstSubNode = 0x0;
00533   _firstSubNodeLabel = 0;
00534 }
00535 
00536 
00537 #include <vector>
00538 #include <algorithm>
00539 #include <string>
00540 #include <cassert>
00541 
00542 #include "FWCore/Utilities/interface/EDMException.h"
00543 
00544 
00545 namespace edm {
00546   namespace detailsTrie  {
00547     inline void errorInsert(std::string const & key) {
00548       Exception::throwThis(errors::InvalidReference,
00549         "Trie::insert called with a key already in collection;\n"
00550         "key value: ",
00551         key.c_str());
00552     }
00553   }
00554 }
00555 
00556 template <typename T>
00557 edm::Trie<T>::Trie(const T &empty) :
00558   _empty(empty), _factory(0x0), _initialNode(0x0)
00559 {
00560   // initialize nodes by paquets of 10000
00561   _factory = new TrieFactory<T>(10000);
00562   _initialNode = _factory->newNode(_empty);
00563 }
00564 
00565 template <typename T>
00566 edm::Trie<T>::~Trie()
00567 {
00568   delete _factory;
00569 }
00570 
00571 template <typename T>
00572 void edm::Trie<T>::setEntry(std::string const & str, const T &value)
00573 {
00574   setEntry(str.c_str(),str.size(),value);
00575 }
00576 template <typename T>
00577 void edm::Trie<T>::setEntry(const char *str, unsigned strLen, const T &value)
00578 {
00579   TrieNode<T>   *node = _addEntry(str, strLen);
00580   node->setValue(value);
00581 }
00582 
00583 template <typename T>
00584 edm::TrieNode<T>* edm::Trie<T>::_addEntry(const char *str, unsigned strLen)
00585 {
00586   unsigned      pos = 0;
00587   bool          found = true;
00588   TrieNode<T>   *node = _initialNode, *previous = 0x0;
00589 
00590   // Look for the part of the word which is in Trie
00591   while (found && pos < strLen)
00592     {
00593       found = false;
00594       previous = node;
00595       node = node->subNodeByLabel(str[pos]);
00596       if (node)
00597         {
00598           found = true;
00599           ++pos;
00600         }
00601     }
00602 
00603   // Add part of the word which is not in Trie
00604   if (!node || pos != strLen)
00605     {
00606       node = previous;
00607       for (unsigned i = pos; i < strLen; ++i)
00608         {
00609           TrieNode<T> *newNode = _factory->newNode(_empty);
00610           node->addSubNode(str[pos], newNode);
00611           node = newNode;
00612           ++pos;
00613         }
00614     }
00615   assert(node != 0x0);
00616   return node;
00617 }
00618 
00619 
00620 template <typename T>
00621 void edm::Trie<T>::insert(std::string const & str, const T &value)
00622 {
00623   insert(str.c_str(),str.size(),value);
00624 }
00625 
00626 template <typename T>
00627 void edm::Trie<T>::insert(const char *str, unsigned strLen, const T &value)
00628 {
00629   TrieNode<T>   *node = _addEntry(str, strLen);
00630   
00631   // Set the value on the last node
00632   if (node && node->value() != _empty)
00633     detailsTrie::errorInsert(std::string(str,strLen));
00634   node->setValue(value);
00635 }
00636 
00637 template <typename T>
00638 const T& edm::Trie<T>::find(std::string const & str) const {
00639   return find(str.c_str(),str.size());
00640 }
00641 
00642 template <typename T>
00643 const T& edm::Trie<T>::find(const char *str, unsigned strLen) const
00644 {
00645   unsigned              pos = 0;
00646   bool                  found = true;
00647   const TrieNode<T>     *node = _initialNode;
00648   
00649   while (found && pos < strLen)
00650     {
00651       found = false;
00652       node = node->subNodeByLabel(str[pos]);
00653       if (node)
00654         {
00655           found = true;
00656           ++pos;
00657         }
00658     }
00659   if (node && pos == strLen) // The word is complet in the automaton
00660     return node->value();
00661   return _empty;
00662 }
00663 
00664 template <typename T>
00665 edm::TrieNode<T> const * 
00666 edm::Trie<T>::node(std::string const & str) const {
00667     return node(str.c_str(),str.size());
00668   }
00669 
00670 template <typename T>
00671 edm::TrieNode<T> const * 
00672 edm::Trie<T>::node(const char *str, unsigned strLen) const {
00673   unsigned              pos = 0;
00674   bool                  found = true;
00675   const TrieNode<T>     *node = _initialNode;
00676         
00677   while (found && pos < strLen)
00678     {
00679       found = false;
00680       node = node->subNodeByLabel(str[pos]);
00681       if (node)
00682         {
00683           found = true;
00684           ++pos;
00685         }
00686     }
00687   return node;
00688 }
00689 
00690 
00691 template <typename T>
00692 const edm::TrieNode<T>* edm::Trie<T>::initialNode() const
00693 {
00694   return _initialNode;
00695 }
00696 
00697 template <typename T>
00698 void edm::Trie<T>::clear()
00699 {
00700   _factory->clear();
00701   _initialNode = _factory->newNode(_empty);
00702 }
00703 
00704 template <typename T>
00705 void edm::Trie<T>::display(std::ostream &os)
00706 {
00707   if (_initialNode)
00708     _initialNode->display(os, 0, 0);
00709 }
00710 
00711 #endif   //  DataFormats_Common_Trie_h