00001 #ifndef DataFormats_Common_Trie_h
00002 #define DataFormats_Common_Trie_h
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #include <list>
00028 #include <string>
00029
00030 namespace edm
00031 {
00032
00033 template <typename T>
00034 class TrieNode;
00035
00040 template <typename T>
00041 class TrieFactory
00042 {
00043 public:
00044 TrieFactory(unsigned paquetSize);
00045 ~TrieFactory();
00046
00047 private:
00049 TrieFactory();
00051 TrieFactory(const TrieFactory &e);
00053 TrieFactory& operator=(const TrieFactory &e);
00054
00055 public:
00056 TrieNode<T>* newNode(const T &value);
00057 void clear();
00058
00059 private:
00060 unsigned _paquetSize;
00061 std::list<TrieNode<T>*> _allocatedNodes;
00062 TrieNode<T> *_lastNodes;
00063 unsigned _nbUsedInLastNodes;
00064 };
00065 }
00066
00067
00068 namespace edm
00069 {
00070 template<typename T>
00071 class TrieNodeIter;
00072
00073
00079 template <typename T>
00080 class TrieNode
00081 {
00082 public:
00083
00084 typedef TrieNodeIter<T> const_iterator;
00085
00086 TrieNode();
00087 ~TrieNode();
00088
00089 private:
00091 TrieNode(const TrieNode &e);
00093 TrieNode& operator=(const TrieNode &e);
00094
00095 public:
00097 void setValue(const T &val);
00099 const T& value() const;
00100
00102 const TrieNode<T>* brother() const;
00103 TrieNode<T>* brother();
00105 unsigned char brotherLabel() const;
00106
00107
00109 TrieNodeIter<T> begin() const;
00111 TrieNodeIter<T> end() const;
00112
00113
00114 const TrieNode<T>* subNode() const;
00115 TrieNode<T>* subNode();
00116 unsigned char subNodeLabel() const;
00117
00118
00119 const TrieNode<T>* subNodeByLabel(unsigned char chr) const;
00120 TrieNode<T>* subNodeByLabel(unsigned char chr);
00121
00122
00123 void addSubNode(unsigned char chr, TrieNode<T> *node);
00124
00126 void display(std::ostream &os, unsigned offset, unsigned char label) const;
00127
00129 void clear();
00130
00131 private:
00132 template <typename Node>
00133 Node _sequentialSearch(Node first, unsigned char label,
00134 unsigned char val) const;
00136 void _setBrother(TrieNode<T> *brother, unsigned char brotherLabel);
00138 void _addBrother(unsigned char chr, TrieNode<T> *brother);
00143 const TrieNode<T>* _getBrother(unsigned char chr) const;
00148 TrieNode<T>* _getBrother(unsigned char chr);
00149
00150 private:
00152 TrieNode<T> *_brother;
00154 unsigned char _brotherLabel;
00156 TrieNode<T> *_firstSubNode;
00158 unsigned char _firstSubNodeLabel;
00160 T _value;
00161 };
00162 }
00163
00164 #include <ostream>
00165
00166 namespace edm
00167 {
00168
00169 template <typename T>
00170 class TrieFactory;
00171 template <typename T>
00172 class TrieNode;
00173
00178 template <typename T>
00179 class Trie
00180 {
00181 public:
00184 Trie(const T &empty);
00185 ~Trie();
00186
00187 private:
00189 Trie();
00191 Trie(const Trie &e);
00193 Trie& operator=(const Trie &e);
00194
00195 public:
00198 void insert(std::string const & str, const T &value);
00199 void insert(const char *str, unsigned strLen, const T &value);
00202 void setEntry(std::string const & str, const T &value);
00203 void setEntry(const char *str, unsigned strLen, const T &value);
00205 const T& find(std::string const & str) const;
00206 const T& find(const char *str, unsigned strLen) const;
00208 const TrieNode<T>* node(std::string const & str) const;
00209 const TrieNode<T>* node(const char *str, unsigned strLen) const;
00211 const TrieNode<T>* initialNode() const;
00213 void display(std::ostream &os);
00215 void clear();
00216
00217 private:
00218 TrieNode<T>* _addEntry(const char *str, unsigned strLen);
00219
00220 private:
00222 T _empty;
00224 TrieFactory<T> *_factory;
00226 TrieNode<T> *_initialNode;
00227 };
00228 }
00229
00230
00231
00232 #include <boost/iterator/iterator_facade.hpp>
00233
00234 #include<string>
00235 #include<ostream>
00236
00237
00238
00239
00240 namespace edm{
00241
00242 template<typename T>
00243 class TrieNodeIter
00244 : public boost::iterator_facade<TrieNodeIter<T>,
00245 TrieNode<T> const,
00246 boost::forward_traversal_tag >
00247 {
00248
00249 public:
00250 typedef TrieNodeIter<T> self;
00251 typedef TrieNode<T> const node_base;
00252 TrieNodeIter()
00253 : m_node(0), m_label(0)
00254 {}
00255
00256 explicit TrieNodeIter(node_base* p)
00257 : m_node(p ? p->subNode() : 0),
00258 m_label(p ? p->subNodeLabel() : 0)
00259 {}
00260
00261 unsigned char label() const { return m_label;}
00262 private:
00263 friend class boost::iterator_core_access;
00264
00265 void increment() {
00266 m_label = m_node->brotherLabel();
00267 m_node = m_node->brother();
00268 }
00269
00270 bool equal(self const& other) const
00271 {
00272 return this->m_node == other.m_node;
00273 }
00274
00275 node_base& dereference() const { return *m_node; }
00276
00277 node_base* m_node;
00278 unsigned char m_label;
00279 };
00280
00281
00283 template<typename V, typename T>
00284 void walkTrie(V & v, TrieNode<T> const & n, std::string const & label="") {
00285 typedef TrieNode<T> const node_base;
00286 typedef TrieNodeIter<T> node_iterator;
00287 node_iterator e;
00288 for (node_iterator p(&n); p!=e; ++p) {
00289 v((*p).value(),label+(char)p.label());
00290 walkTrie(v,*p,label+(char)p.label());
00291 }
00292 }
00293
00294
00295
00297 template<typename V, typename T>
00298 bool iterateTrieLeaves(V & v, TrieNode<T> const & n, std::string const & label="") {
00299 typedef TrieNode<T> const node_base;
00300 typedef TrieNodeIter<T> node_iterator;
00301 node_iterator e;
00302 node_iterator p(&n);
00303 if (p==e) return true;
00304 for (; p!=e; ++p) {
00305 if (iterateTrieLeaves(v,*p,label+(char)p.label()) )
00306 v((*p).value(),label+(char)p.label());
00307 }
00308 return false;
00309 }
00310
00311
00312 }
00313
00314
00315
00316
00317
00318
00319
00320 template <typename T>
00321 edm::TrieFactory<T>::TrieFactory(unsigned paquetSize) :
00322 _paquetSize(paquetSize), _lastNodes(0x0), _nbUsedInLastNodes(0)
00323 {
00324 _lastNodes = new TrieNode<T>[paquetSize];
00325 }
00326
00327 template <typename T>
00328 edm::TrieFactory<T>::~TrieFactory()
00329 {
00330 typename std::list<TrieNode<T>*>::const_iterator it;
00331
00332 for (it = _allocatedNodes.begin(); it != _allocatedNodes.end(); ++it)
00333 delete[] *it;
00334 if (_lastNodes)
00335 delete[] _lastNodes;
00336 }
00337
00338 template <typename T>
00339 edm::TrieNode<T>* edm::TrieFactory<T>::newNode(const T &value)
00340 {
00341 if (_nbUsedInLastNodes == _paquetSize)
00342 {
00343 _allocatedNodes.push_back(_lastNodes);
00344 _nbUsedInLastNodes = 0;
00345 _lastNodes = new TrieNode<T>[_paquetSize];
00346 }
00347 TrieNode<T> *res = &_lastNodes[_nbUsedInLastNodes];
00348 ++_nbUsedInLastNodes;
00349 res->setValue(value);
00350 res->clear();
00351 return res;
00352 }
00353
00354 template <typename T>
00355 void edm::TrieFactory<T>::clear()
00356 {
00357 typename std::list<TrieNode<T>*>::const_iterator it;
00358 for (it = _allocatedNodes.begin(); it != _allocatedNodes.end(); ++it)
00359 delete[] *it;
00360 _allocatedNodes.clear();
00361 _nbUsedInLastNodes = 0;
00362 }
00363
00364
00365 template <typename T>
00366 edm::TrieNode<T>::TrieNode() :
00367 _brother(0), _brotherLabel(0), _firstSubNode(0), _firstSubNodeLabel(0),_value()
00370 {
00371 }
00372
00373 template <typename T>
00374 edm::TrieNode<T>::~TrieNode()
00375 {
00376
00377
00378 }
00379
00380
00381 template <typename T>
00382 edm::TrieNodeIter<T>
00383 edm::TrieNode<T>::begin() const {
00384 return const_iterator(this);
00385 }
00386 template <typename T>
00387 edm::TrieNodeIter<T>
00388 edm::TrieNode<T>::end() const {
00389 return const_iterator(0);
00390 }
00391
00392 template <typename T>
00393 void edm::TrieNode<T>::setValue(const T &val)
00394 {
00395 _value = val;
00396 }
00397
00398 template <typename T>
00399 const T& edm::TrieNode<T>::value() const
00400 {
00401 return _value;
00402 }
00403
00404 template <typename T>
00405 const edm::TrieNode<T>* edm::TrieNode<T>::brother() const
00406 {
00407 return _brother;
00408 }
00409
00410 template <typename T>
00411 edm::TrieNode<T>* edm::TrieNode<T>::brother()
00412 {
00413 return _brother;
00414 }
00415
00416 template <typename T>
00417 const edm::TrieNode<T>* edm::TrieNode<T>::_getBrother(unsigned char chr) const
00418 {
00419 const TrieNode<T> *brother = _brother;
00420 return _sequentialSearch(brother, _brotherLabel, chr);
00421 }
00422
00423 template <typename T>
00424 edm::TrieNode<T>* edm::TrieNode<T>::_getBrother(unsigned char chr)
00425 {
00426 return _sequentialSearch(_brother, _brotherLabel, chr);
00427 }
00428
00429 template <typename T>
00430 void edm::TrieNode<T>::_addBrother(unsigned char chr, TrieNode<T> *brother)
00431 {
00432 if (!_brother || _brotherLabel > chr)
00433 {
00434 brother->_setBrother(_brother, _brotherLabel);
00435 _brother = brother;
00436 _brotherLabel = chr;
00437 }
00438 else
00439 _brother->_addBrother(chr, brother);
00440 }
00441
00442 template <typename T>
00443 unsigned char edm::TrieNode<T>::brotherLabel() const
00444 {
00445 return _brotherLabel;
00446 }
00447
00448 template <typename T>
00449 const edm::TrieNode<T>* edm::TrieNode<T>::subNode() const
00450 {
00451 return _firstSubNode;
00452 }
00453
00454 template <typename T>
00455 edm::TrieNode<T>* edm::TrieNode<T>::subNode()
00456 {
00457 return _firstSubNode;
00458 }
00459
00460 template <typename T>
00461 unsigned char edm::TrieNode<T>::subNodeLabel() const
00462 {
00463 return _firstSubNodeLabel;
00464 }
00465
00466 template <typename T>
00467 const edm::TrieNode<T>* edm::TrieNode<T>::subNodeByLabel(unsigned char chr) const
00468 {
00469 const TrieNode<T> *first = _firstSubNode;
00470 return _sequentialSearch(first, _firstSubNodeLabel, chr);
00471 }
00472
00473 template <typename T>
00474 edm::TrieNode<T>* edm::TrieNode<T>::subNodeByLabel(unsigned char chr)
00475 {
00476 return _sequentialSearch(_firstSubNode, _firstSubNodeLabel, chr);
00477 }
00478
00479 template <typename T>
00480 void edm::TrieNode<T>::addSubNode(unsigned char chr, TrieNode<T> *node)
00481 {
00482 if (!_firstSubNode || _firstSubNodeLabel > chr)
00483 {
00484 node->_setBrother(_firstSubNode, _firstSubNodeLabel);
00485 _firstSubNode = node;
00486 _firstSubNodeLabel = chr;
00487 }
00488 else
00489 _firstSubNode->_addBrother(chr, node);
00490 }
00491
00492 template <typename T>
00493 template <typename Node>
00494 inline Node edm::TrieNode<T>::_sequentialSearch(Node first, unsigned char label, unsigned char val) const
00495 {
00496 if (first && label <= val)
00497 {
00498 if (label == val)
00499 return first;
00500 return first->_getBrother(val);
00501 }
00502 return 0x0;
00503 }
00504
00505 template <typename T>
00506 void edm::TrieNode<T>::_setBrother(TrieNode<T> *brother, unsigned char brotherLabel)
00507 {
00508 _brother = brother;
00509 _brotherLabel = brotherLabel;
00510 }
00511
00512 template <typename T>
00513 void edm::TrieNode<T>::display(std::ostream &os, unsigned offset, unsigned char label) const
00514 {
00515 unsigned int i;
00516 for (i = 0; i < offset; ++i)
00517 os << " ";
00518 if (label)
00519 os << "label[" << label << "] ";
00520 os << "value[" << _value << "]" << std::endl;
00521 if (_firstSubNode)
00522 _firstSubNode->display(os, offset + 2, _firstSubNodeLabel);
00523 if (_brother)
00524 _brother->display(os, offset, _brotherLabel);
00525 }
00526
00527 template <typename T>
00528 void edm::TrieNode<T>::clear()
00529 {
00530 _brother = 0x0;
00531 _brotherLabel = 0;
00532 _firstSubNode = 0x0;
00533 _firstSubNodeLabel = 0;
00534 }
00535
00536
00537 #include <vector>
00538 #include <algorithm>
00539 #include <string>
00540 #include <cassert>
00541
00542 #include "FWCore/Utilities/interface/EDMException.h"
00543
00544
00545 namespace edm {
00546 namespace detailsTrie {
00547 inline void errorInsert(std::string const & key) {
00548 Exception::throwThis(errors::InvalidReference,
00549 "Trie::insert called with a key already in collection;\n"
00550 "key value: ",
00551 key.c_str());
00552 }
00553 }
00554 }
00555
00556 template <typename T>
00557 edm::Trie<T>::Trie(const T &empty) :
00558 _empty(empty), _factory(0x0), _initialNode(0x0)
00559 {
00560
00561 _factory = new TrieFactory<T>(10000);
00562 _initialNode = _factory->newNode(_empty);
00563 }
00564
00565 template <typename T>
00566 edm::Trie<T>::~Trie()
00567 {
00568 delete _factory;
00569 }
00570
00571 template <typename T>
00572 void edm::Trie<T>::setEntry(std::string const & str, const T &value)
00573 {
00574 setEntry(str.c_str(),str.size(),value);
00575 }
00576 template <typename T>
00577 void edm::Trie<T>::setEntry(const char *str, unsigned strLen, const T &value)
00578 {
00579 TrieNode<T> *node = _addEntry(str, strLen);
00580 node->setValue(value);
00581 }
00582
00583 template <typename T>
00584 edm::TrieNode<T>* edm::Trie<T>::_addEntry(const char *str, unsigned strLen)
00585 {
00586 unsigned pos = 0;
00587 bool found = true;
00588 TrieNode<T> *node = _initialNode, *previous = 0x0;
00589
00590
00591 while (found && pos < strLen)
00592 {
00593 found = false;
00594 previous = node;
00595 node = node->subNodeByLabel(str[pos]);
00596 if (node)
00597 {
00598 found = true;
00599 ++pos;
00600 }
00601 }
00602
00603
00604 if (!node || pos != strLen)
00605 {
00606 node = previous;
00607 for (unsigned i = pos; i < strLen; ++i)
00608 {
00609 TrieNode<T> *newNode = _factory->newNode(_empty);
00610 node->addSubNode(str[pos], newNode);
00611 node = newNode;
00612 ++pos;
00613 }
00614 }
00615 assert(node != 0x0);
00616 return node;
00617 }
00618
00619
00620 template <typename T>
00621 void edm::Trie<T>::insert(std::string const & str, const T &value)
00622 {
00623 insert(str.c_str(),str.size(),value);
00624 }
00625
00626 template <typename T>
00627 void edm::Trie<T>::insert(const char *str, unsigned strLen, const T &value)
00628 {
00629 TrieNode<T> *node = _addEntry(str, strLen);
00630
00631
00632 if (node && node->value() != _empty)
00633 detailsTrie::errorInsert(std::string(str,strLen));
00634 node->setValue(value);
00635 }
00636
00637 template <typename T>
00638 const T& edm::Trie<T>::find(std::string const & str) const {
00639 return find(str.c_str(),str.size());
00640 }
00641
00642 template <typename T>
00643 const T& edm::Trie<T>::find(const char *str, unsigned strLen) const
00644 {
00645 unsigned pos = 0;
00646 bool found = true;
00647 const TrieNode<T> *node = _initialNode;
00648
00649 while (found && pos < strLen)
00650 {
00651 found = false;
00652 node = node->subNodeByLabel(str[pos]);
00653 if (node)
00654 {
00655 found = true;
00656 ++pos;
00657 }
00658 }
00659 if (node && pos == strLen)
00660 return node->value();
00661 return _empty;
00662 }
00663
00664 template <typename T>
00665 edm::TrieNode<T> const *
00666 edm::Trie<T>::node(std::string const & str) const {
00667 return node(str.c_str(),str.size());
00668 }
00669
00670 template <typename T>
00671 edm::TrieNode<T> const *
00672 edm::Trie<T>::node(const char *str, unsigned strLen) const {
00673 unsigned pos = 0;
00674 bool found = true;
00675 const TrieNode<T> *node = _initialNode;
00676
00677 while (found && pos < strLen)
00678 {
00679 found = false;
00680 node = node->subNodeByLabel(str[pos]);
00681 if (node)
00682 {
00683 found = true;
00684 ++pos;
00685 }
00686 }
00687 return node;
00688 }
00689
00690
00691 template <typename T>
00692 const edm::TrieNode<T>* edm::Trie<T>::initialNode() const
00693 {
00694 return _initialNode;
00695 }
00696
00697 template <typename T>
00698 void edm::Trie<T>::clear()
00699 {
00700 _factory->clear();
00701 _initialNode = _factory->newNode(_empty);
00702 }
00703
00704 template <typename T>
00705 void edm::Trie<T>::display(std::ostream &os)
00706 {
00707 if (_initialNode)
00708 _initialNode->display(os, 0, 0);
00709 }
00710
00711 #endif // DataFormats_Common_Trie_h