CMS 3D CMS Logo

Trie.h

Go to the documentation of this file.
00001 #ifndef DataFormats_Common_Trie_h
00002 #define DataFormats_Common_Trie_h
00003 /*
00004 ** 
00005 ** 
00006 ** Copyright (C) 2006 Julien Lemoine
00007 ** This program is free software; you can redistribute it and/or modify
00008 ** it under the terms of the GNU Lesser General Public License as published by
00009 ** the Free Software Foundation; either version 2 of the License, or
00010 ** (at your option) any later version.
00011 ** 
00012 ** This program is distributed in the hope that it will be useful,
00013 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 ** GNU Lesser General Public License for more details.
00016 ** 
00017 ** You should have received a copy of the GNU Lesser General Public License
00018 ** along with this program; if not, write to the Free Software
00019 ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00020 **
00021 **
00022 **   modified by Vincenzo Innocente on 15/08/2007
00023 **
00024 */
00025 
00026 
00027 #include <list>
00028 #include <string>
00029 
00030 namespace edm
00031 {
00032   // fwd declaration
00033   template <typename T>
00034   class TrieNode;
00035 
00040   template <typename T>
00041   class TrieFactory
00042     {
00043     public:
00044       TrieFactory(unsigned paquetSize);
00045       ~TrieFactory();
00046 
00047     private:
00049       TrieFactory();
00051       TrieFactory(const TrieFactory &e);
00053       TrieFactory& operator=(const TrieFactory &e);
00054 
00055     public:
00056       TrieNode<T>* newNode(const T &value);
00057       void clear();
00058 
00059     private:
00060       unsigned                  _paquetSize;
00061       std::list<TrieNode<T>*>   _allocatedNodes;
00062       TrieNode<T>               *_lastNodes;
00063       unsigned                  _nbUsedInLastNodes;
00064     };
00065 }
00066 
00067 
00068 namespace edm
00069 {
00070   template<typename T>
00071   class TrieNodeIter;
00072 
00073 
00079   template <typename T>
00080   class TrieNode
00081   {
00082   public:
00083 
00084     typedef TrieNodeIter<T> const_iterator;
00085 
00086     TrieNode();
00087     ~TrieNode();
00088 
00089   private:
00091     TrieNode(const TrieNode &e);
00093     TrieNode& operator=(const TrieNode &e);
00094 
00095   public:
00097     void setValue(const T &val);
00099     const T& value() const;
00100 
00102     const TrieNode<T>* brother() const;
00103     TrieNode<T>* brother();
00105     unsigned char brotherLabel() const;
00106 
00107 
00109     TrieNodeIter<T> begin() const;
00111     TrieNodeIter<T> end() const;
00112 
00113     // get first sub Node
00114     const TrieNode<T>* subNode() const;
00115     TrieNode<T>* subNode();
00116     unsigned char subNodeLabel() const;
00117 
00118     // Looking for a sub node
00119     const TrieNode<T>* subNodeByLabel(unsigned char chr) const;
00120     TrieNode<T>* subNodeByLabel(unsigned char chr);
00121 
00122     // add an edge
00123     void addSubNode(unsigned char chr, TrieNode<T> *node);
00124 
00126     void display(std::ostream &os, unsigned offset, unsigned char label) const;
00127 
00129     void clear();
00130 
00131   private:
00132     template <typename Node>
00133     Node _sequentialSearch(Node first, unsigned char label,
00134                            unsigned char val) const;
00136     void _setBrother(TrieNode<T> *brother, unsigned char brotherLabel);
00138     void _addBrother(unsigned char chr, TrieNode<T> *brother);
00143     const TrieNode<T>* _getBrother(unsigned char chr) const;
00148     TrieNode<T>* _getBrother(unsigned char chr);
00149 
00150   private:
00152     TrieNode<T>         *_brother;
00154     unsigned char       _brotherLabel;
00156     TrieNode<T>         *_firstSubNode;
00158     unsigned char       _firstSubNodeLabel;
00160     T                   _value;
00161   };
00162 }
00163 
00164 #include <ostream>
00165 
00166 namespace edm
00167 {
00168   //fwd declaration
00169   template <typename T>
00170     class TrieFactory;
00171   template <typename T>
00172     class TrieNode;
00173 
00178   template <typename T>
00179     class Trie
00180     {
00181     public:
00184       Trie(const T &empty);
00185       ~Trie();
00186 
00187     private:
00189       Trie();
00191       Trie(const Trie &e);
00193       Trie& operator=(const Trie &e);
00194     
00195     public:
00198       void insert(std::string const & str, const T &value);
00199       void insert(const char *str, unsigned strLen, const T &value);
00202       void setEntry(std::string const & str, const T &value);
00203       void setEntry(const char *str, unsigned strLen, const T &value);
00205       const T& find(std::string const & str) const;
00206       const T& find(const char *str, unsigned strLen) const;
00208       const TrieNode<T>* node(std::string const & str) const;
00209       const TrieNode<T>* node(const char *str, unsigned strLen) const;
00211       const TrieNode<T>* initialNode() const;
00213       void display(std::ostream &os);
00215       void clear();
00216 
00217     private:
00218       TrieNode<T>* _addEntry(const char *str, unsigned strLen);
00219 
00220     private:
00222       T                 _empty;
00224       TrieFactory<T>    *_factory;
00226       TrieNode<T>               *_initialNode;
00227     };
00228 }
00229 
00230 
00231 
00232 #include <boost/iterator/iterator_facade.hpp>
00233 
00234 #include<string>
00235 #include<ostream>
00236 
00237 
00238 // iterators and visitors
00239 
00240 namespace edm{
00241 
00242   template<typename T>
00243   class TrieNodeIter
00244     : public boost::iterator_facade<TrieNodeIter<T>,
00245                                     TrieNode<T> const, 
00246                                     boost::forward_traversal_tag >
00247   {
00248     
00249   public:
00250     typedef TrieNodeIter<T> self;
00251     typedef TrieNode<T> const node_base;
00252     TrieNodeIter()
00253       : m_node(0), m_label(0)
00254     {}
00255     
00256     explicit TrieNodeIter(node_base* p)
00257       : m_node(p ? p->subNode() : 0), 
00258         m_label(p ? p->subNodeLabel() : 0)
00259     {}
00260     
00261     unsigned char label() const { return m_label;}
00262   private:
00263     friend class boost::iterator_core_access;
00264     
00265     void increment() {
00266       m_label = m_node->brotherLabel();
00267       m_node = m_node->brother(); 
00268     }
00269     
00270     bool equal(self const& other) const
00271     {
00272       return this->m_node == other.m_node;
00273     }
00274     
00275     node_base& dereference() const { return *m_node; }
00276     
00277     node_base* m_node;
00278     unsigned char m_label;
00279   };
00280   
00281   
00283   template<typename V, typename T>
00284   void walkTrie(V & v,  TrieNode<T> const  & n, std::string const & label="") {
00285     typedef TrieNode<T> const node_base;
00286     typedef TrieNodeIter<T> node_iterator;
00287     node_iterator e;
00288     for (node_iterator p(&n); p!=e; ++p) {
00289       v((*p).value(),label+(char)p.label());
00290       walkTrie(v,*p,label+(char)p.label());
00291     }
00292   }
00293    
00294 
00295 
00297   template<typename V, typename T>
00298   bool iterateTrieLeaves(V & v,  TrieNode<T> const  & n, std::string const & label="") {
00299     typedef TrieNode<T> const node_base;
00300     typedef TrieNodeIter<T> node_iterator;
00301     node_iterator e;
00302     node_iterator p(&n);
00303     if (p==e) return true;
00304     for (; p!=e; ++p) {
00305       if (iterateTrieLeaves(v,*p,label+(char)p.label()) )
00306         v((*p).value(),label+(char)p.label());
00307     }
00308     return false;
00309   }
00310 
00311 
00312 }
00313 
00314 //
00315 //----------------------------------------------------------------
00316 //
00317 // implementations
00318 
00319 
00320 template <typename T>
00321 edm::TrieFactory<T>::TrieFactory(unsigned paquetSize) :
00322   _paquetSize(paquetSize), _lastNodes(0x0), _nbUsedInLastNodes(0)
00323 {
00324   _lastNodes = new TrieNode<T>[paquetSize];
00325 }
00326 
00327 template <typename T>
00328 edm::TrieFactory<T>::~TrieFactory()
00329 {
00330   typename std::list<TrieNode<T>*>::const_iterator it;
00331 
00332   for (it = _allocatedNodes.begin(); it != _allocatedNodes.end(); ++it)
00333     delete[] *it;
00334   if (_lastNodes)
00335     delete[] _lastNodes;
00336 }
00337 
00338 template <typename T>
00339 edm::TrieNode<T>* edm::TrieFactory<T>::newNode(const T &value)
00340 {
00341   if (_nbUsedInLastNodes == _paquetSize)
00342     {
00343       _allocatedNodes.push_back(_lastNodes);
00344       _nbUsedInLastNodes = 0;
00345       _lastNodes = new TrieNode<T>[_paquetSize];
00346     }
00347   TrieNode<T> *res = &_lastNodes[_nbUsedInLastNodes];
00348   ++_nbUsedInLastNodes;
00349   res->setValue(value);
00350   res->clear();
00351   return res;
00352 }
00353 
00354 template <typename T>
00355 void edm::TrieFactory<T>::clear()
00356 {
00357   typename std::list<TrieNode<T>*>::const_iterator it;
00358   for (it = _allocatedNodes.begin(); it != _allocatedNodes.end(); ++it)
00359     delete[] *it;
00360   _allocatedNodes.clear();
00361   _nbUsedInLastNodes = 0;
00362 }
00363 
00364 
00365 template <typename T>
00366 edm::TrieNode<T>::TrieNode() :
00367   _brother(0), _brotherLabel(0), _firstSubNode(0), _firstSubNodeLabel(0)
00370 {
00371 }
00372 
00373 template <typename T>
00374 edm::TrieNode<T>::~TrieNode()
00375 {
00376   // do not delete _brother and _firstSubNode because they are
00377   // allocated by factory (TrieFactory) and factory will delete them
00378 }
00379 
00380 
00381 template <typename T>   
00382 edm::TrieNodeIter<T> 
00383 edm::TrieNode<T>::begin() const {
00384   return const_iterator(this);
00385 }
00386 template <typename T>   
00387 edm::TrieNodeIter<T> 
00388 edm::TrieNode<T>::end() const {
00389   return const_iterator(0);
00390 }
00391 
00392 template <typename T>
00393 void edm::TrieNode<T>::setValue(const T &val)
00394 {
00395   _value = val;
00396 }
00397 
00398 template <typename T>
00399 const T& edm::TrieNode<T>::value() const
00400 {
00401   return _value;
00402 }
00403 
00404 template <typename T>
00405 const edm::TrieNode<T>* edm::TrieNode<T>::brother() const
00406 {
00407   return _brother;
00408 }
00409 
00410 template <typename T>
00411 edm::TrieNode<T>* edm::TrieNode<T>::brother()
00412 {
00413   return _brother;
00414 }
00415 
00416 template <typename T>
00417 const edm::TrieNode<T>* edm::TrieNode<T>::_getBrother(unsigned char chr) const
00418 {
00419   const TrieNode<T> *brother = _brother;
00420   return _sequentialSearch(brother, _brotherLabel, chr);
00421 }
00422 
00423 template <typename T>
00424 edm::TrieNode<T>* edm::TrieNode<T>::_getBrother(unsigned char chr)
00425 {
00426   return _sequentialSearch(_brother, _brotherLabel, chr);
00427 }
00428 
00429 template <typename T>
00430 void edm::TrieNode<T>::_addBrother(unsigned char chr, TrieNode<T> *brother)
00431 {
00432   if (!_brother || _brotherLabel > chr)
00433     {
00434       brother->_setBrother(_brother, _brotherLabel);
00435       _brother = brother;
00436       _brotherLabel = chr;
00437     }
00438   else
00439     _brother->_addBrother(chr, brother);
00440 }
00441 
00442 template <typename T>
00443 unsigned char edm::TrieNode<T>::brotherLabel() const
00444 {
00445   return _brotherLabel;
00446 }
00447 
00448 template <typename T>
00449 const edm::TrieNode<T>* edm::TrieNode<T>::subNode() const
00450 {
00451   return _firstSubNode;
00452 }
00453 
00454 template <typename T>
00455 edm::TrieNode<T>* edm::TrieNode<T>::subNode()
00456 {
00457   return _firstSubNode;
00458 }
00459 
00460 template <typename T>
00461 unsigned char edm::TrieNode<T>::subNodeLabel() const
00462 {
00463   return _firstSubNodeLabel;
00464 }
00465 
00466 template <typename T>
00467 const edm::TrieNode<T>* edm::TrieNode<T>::subNodeByLabel(unsigned char chr) const
00468 {
00469   const TrieNode<T> *first = _firstSubNode;
00470   return _sequentialSearch(first, _firstSubNodeLabel, chr);
00471 }
00472 
00473 template <typename T>
00474 edm::TrieNode<T>* edm::TrieNode<T>::subNodeByLabel(unsigned char chr)
00475 {
00476   return _sequentialSearch(_firstSubNode, _firstSubNodeLabel, chr);
00477 }
00478 
00479 template <typename T>
00480 void edm::TrieNode<T>::addSubNode(unsigned char chr, TrieNode<T> *node)
00481 {
00482   if (!_firstSubNode || _firstSubNodeLabel > chr)
00483     {
00484       node->_setBrother(_firstSubNode, _firstSubNodeLabel);
00485       _firstSubNode = node;
00486       _firstSubNodeLabel = chr;
00487     }
00488   else
00489     _firstSubNode->_addBrother(chr, node);
00490 }
00491 
00492 template <typename T>
00493 template <typename Node>
00494 inline Node edm::TrieNode<T>::_sequentialSearch(Node first, unsigned char label, unsigned char val) const
00495 {
00496   if (first && label <= val)
00497     {
00498       if (label == val)
00499         return first;
00500       return first->_getBrother(val);
00501     }
00502   return 0x0;
00503 }
00504 
00505 template <typename T>
00506 void edm::TrieNode<T>::_setBrother(TrieNode<T> *brother, unsigned char brotherLabel)
00507 {
00508   _brother = brother;
00509   _brotherLabel = brotherLabel;
00510 }
00511 
00512 template <typename T>
00513 void edm::TrieNode<T>::display(std::ostream &os, unsigned offset, unsigned char label) const
00514 {
00515   unsigned int i;
00516   for (i = 0; i < offset; ++i)
00517     os << " ";
00518   if (label)
00519     os << "label[" << label << "] ";
00520   os << "value[" << _value << "]" << std::endl;
00521   if (_firstSubNode)
00522     _firstSubNode->display(os, offset + 2, _firstSubNodeLabel);
00523   if (_brother)
00524     _brother->display(os, offset, _brotherLabel);
00525 }
00526 
00527 template <typename T>
00528 void edm::TrieNode<T>::clear()
00529 {
00530   _brother = 0x0;
00531   _brotherLabel = 0;
00532   _firstSubNode = 0x0;
00533   _firstSubNodeLabel = 0;
00534 }
00535 
00536 
00537 #include <vector>
00538 #include <algorithm>
00539 #include <string>
00540 #include <cassert>
00541 
00542 #include "FWCore/Utilities/interface/EDMException.h"
00543 
00544 
00545 namespace edm {
00546   namespace detailsTrie  {
00547     inline void errorInsert(std::string const & key) {
00548       throw edm::Exception(edm::errors::InvalidReference)
00549         << "Trie::insert called with a key already in collection;\n"
00550         << "key value: " << key;
00551     }
00552   }
00553 }
00554 
00555 template <typename T>
00556 edm::Trie<T>::Trie(const T &empty) :
00557   _empty(empty), _factory(0x0), _initialNode(0x0)
00558 {
00559   // initialize nodes by paquets of 10000
00560   _factory = new TrieFactory<T>(10000);
00561   _initialNode = _factory->newNode(_empty);
00562 }
00563 
00564 template <typename T>
00565 edm::Trie<T>::~Trie()
00566 {
00567   delete _factory;
00568 }
00569 
00570 template <typename T>
00571 void edm::Trie<T>::setEntry(std::string const & str, const T &value)
00572 {
00573   setEntry(str.c_str(),str.size(),value);
00574 }
00575 template <typename T>
00576 void edm::Trie<T>::setEntry(const char *str, unsigned strLen, const T &value)
00577 {
00578   TrieNode<T>   *node = _addEntry(str, strLen);
00579   node->setValue(value);
00580 }
00581 
00582 template <typename T>
00583 edm::TrieNode<T>* edm::Trie<T>::_addEntry(const char *str, unsigned strLen)
00584 {
00585   unsigned      pos = 0;
00586   bool          found = true;
00587   TrieNode<T>   *node = _initialNode, *previous = 0x0;
00588 
00589   // Look for the part of the word which is in Trie
00590   while (found && pos < strLen)
00591     {
00592       found = false;
00593       previous = node;
00594       node = node->subNodeByLabel(str[pos]);
00595       if (node)
00596         {
00597           found = true;
00598           ++pos;
00599         }
00600     }
00601 
00602   // Add part of the word which is not in Trie
00603   if (!node || pos != strLen)
00604     {
00605       node = previous;
00606       for (unsigned i = pos; i < strLen; ++i)
00607         {
00608           TrieNode<T> *newNode = _factory->newNode(_empty);
00609           node->addSubNode(str[pos], newNode);
00610           node = newNode;
00611           ++pos;
00612         }
00613     }
00614   assert(node != 0x0);
00615   return node;
00616 }
00617 
00618 
00619 template <typename T>
00620 void edm::Trie<T>::insert(std::string const & str, const T &value)
00621 {
00622   insert(str.c_str(),str.size(),value);
00623 }
00624 
00625 template <typename T>
00626 void edm::Trie<T>::insert(const char *str, unsigned strLen, const T &value)
00627 {
00628   TrieNode<T>   *node = _addEntry(str, strLen);
00629   
00630   // Set the value on the last node
00631   if (node && node->value() != _empty)
00632     detailsTrie::errorInsert(std::string(str,strLen));
00633   node->setValue(value);
00634 }
00635 
00636 template <typename T>
00637 const T& edm::Trie<T>::find(std::string const & str) const {
00638   return find(str.c_str(),str.size());
00639 }
00640 
00641 template <typename T>
00642 const T& edm::Trie<T>::find(const char *str, unsigned strLen) const
00643 {
00644   unsigned              pos = 0;
00645   bool                  found = true;
00646   const TrieNode<T>     *node = _initialNode;
00647   
00648   while (found && pos < strLen)
00649     {
00650       found = false;
00651       node = node->subNodeByLabel(str[pos]);
00652       if (node)
00653         {
00654           found = true;
00655           ++pos;
00656         }
00657     }
00658   if (node && pos == strLen) // The word is complet in the automaton
00659     return node->value();
00660   return _empty;
00661 }
00662 
00663 template <typename T>
00664 edm::TrieNode<T> const * 
00665 edm::Trie<T>::node(std::string const & str) const {
00666     return node(str.c_str(),str.size());
00667   }
00668 
00669 template <typename T>
00670 edm::TrieNode<T> const * 
00671 edm::Trie<T>::node(const char *str, unsigned strLen) const {
00672   unsigned              pos = 0;
00673   bool                  found = true;
00674   const TrieNode<T>     *node = _initialNode;
00675         
00676   while (found && pos < strLen)
00677     {
00678       found = false;
00679       node = node->subNodeByLabel(str[pos]);
00680       if (node)
00681         {
00682           found = true;
00683           ++pos;
00684         }
00685     }
00686   return node;
00687 }
00688 
00689 
00690 template <typename T>
00691 const edm::TrieNode<T>* edm::Trie<T>::initialNode() const
00692 {
00693   return _initialNode;
00694 }
00695 
00696 template <typename T>
00697 void edm::Trie<T>::clear()
00698 {
00699   _factory->clear();
00700   _initialNode = _factory->newNode(_empty);
00701 }
00702 
00703 template <typename T>
00704 void edm::Trie<T>::display(std::ostream &os)
00705 {
00706   if (_initialNode)
00707     _initialNode->display(os, 0, 0);
00708 }
00709 
00710 #endif   //  DataFormats_Common_Trie_h

Generated on Tue Jun 9 17:30:30 2009 for CMSSW by  doxygen 1.5.4