00001 #ifndef DataFormats_Common_Trie_h
00002 #define DataFormats_Common_Trie_h
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #include <list>
00028 #include <string>
00029
00030 namespace edm
00031 {
00032
00033 template <typename T>
00034 class TrieNode;
00035
00040 template <typename T>
00041 class TrieFactory
00042 {
00043 public:
00044 TrieFactory(unsigned paquetSize);
00045 ~TrieFactory();
00046
00047 private:
00049 TrieFactory();
00051 TrieFactory(const TrieFactory &e);
00053 TrieFactory& operator=(const TrieFactory &e);
00054
00055 public:
00056 TrieNode<T>* newNode(const T &value);
00057 void clear();
00058
00059 private:
00060 unsigned _paquetSize;
00061 std::list<TrieNode<T>*> _allocatedNodes;
00062 TrieNode<T> *_lastNodes;
00063 unsigned _nbUsedInLastNodes;
00064 };
00065 }
00066
00067
00068 namespace edm
00069 {
00070 template<typename T>
00071 class TrieNodeIter;
00072
00073
00079 template <typename T>
00080 class TrieNode
00081 {
00082 public:
00083
00084 typedef TrieNodeIter<T> const_iterator;
00085
00086 TrieNode();
00087 ~TrieNode();
00088
00089 private:
00091 TrieNode(const TrieNode &e);
00093 TrieNode& operator=(const TrieNode &e);
00094
00095 public:
00097 void setValue(const T &val);
00099 const T& value() const;
00100
00102 const TrieNode<T>* brother() const;
00103 TrieNode<T>* brother();
00105 unsigned char brotherLabel() const;
00106
00107
00109 TrieNodeIter<T> begin() const;
00111 TrieNodeIter<T> end() const;
00112
00113
00114 const TrieNode<T>* subNode() const;
00115 TrieNode<T>* subNode();
00116 unsigned char subNodeLabel() const;
00117
00118
00119 const TrieNode<T>* subNodeByLabel(unsigned char chr) const;
00120 TrieNode<T>* subNodeByLabel(unsigned char chr);
00121
00122
00123 void addSubNode(unsigned char chr, TrieNode<T> *node);
00124
00126 void display(std::ostream &os, unsigned offset, unsigned char label) const;
00127
00129 void clear();
00130
00131 private:
00132 template <typename Node>
00133 Node _sequentialSearch(Node first, unsigned char label,
00134 unsigned char val) const;
00136 void _setBrother(TrieNode<T> *brother, unsigned char brotherLabel);
00138 void _addBrother(unsigned char chr, TrieNode<T> *brother);
00143 const TrieNode<T>* _getBrother(unsigned char chr) const;
00148 TrieNode<T>* _getBrother(unsigned char chr);
00149
00150 private:
00152 TrieNode<T> *_brother;
00154 unsigned char _brotherLabel;
00156 TrieNode<T> *_firstSubNode;
00158 unsigned char _firstSubNodeLabel;
00160 T _value;
00161 };
00162 }
00163
00164 #include <ostream>
00165
00166 namespace edm
00167 {
00168
00169 template <typename T>
00170 class TrieFactory;
00171 template <typename T>
00172 class TrieNode;
00173
00178 template <typename T>
00179 class Trie
00180 {
00181 public:
00184 Trie(const T &empty);
00185 ~Trie();
00186
00187 private:
00189 Trie();
00191 Trie(const Trie &e);
00193 Trie& operator=(const Trie &e);
00194
00195 public:
00198 void insert(std::string const & str, const T &value);
00199 void insert(const char *str, unsigned strLen, const T &value);
00202 void setEntry(std::string const & str, const T &value);
00203 void setEntry(const char *str, unsigned strLen, const T &value);
00205 const T& find(std::string const & str) const;
00206 const T& find(const char *str, unsigned strLen) const;
00208 const TrieNode<T>* node(std::string const & str) const;
00209 const TrieNode<T>* node(const char *str, unsigned strLen) const;
00211 const TrieNode<T>* initialNode() const;
00213 void display(std::ostream &os);
00215 void clear();
00216
00217 private:
00218 TrieNode<T>* _addEntry(const char *str, unsigned strLen);
00219
00220 private:
00222 T _empty;
00224 TrieFactory<T> *_factory;
00226 TrieNode<T> *_initialNode;
00227 };
00228 }
00229
00230
00231
00232 #include <boost/iterator/iterator_facade.hpp>
00233
00234 #include<string>
00235 #include<ostream>
00236
00237
00238
00239
00240 namespace edm{
00241
00242 template<typename T>
00243 class TrieNodeIter
00244 : public boost::iterator_facade<TrieNodeIter<T>,
00245 TrieNode<T> const,
00246 boost::forward_traversal_tag >
00247 {
00248
00249 public:
00250 typedef TrieNodeIter<T> self;
00251 typedef TrieNode<T> const node_base;
00252 TrieNodeIter()
00253 : m_node(0), m_label(0)
00254 {}
00255
00256 explicit TrieNodeIter(node_base* p)
00257 : m_node(p ? p->subNode() : 0),
00258 m_label(p ? p->subNodeLabel() : 0)
00259 {}
00260
00261 unsigned char label() const { return m_label;}
00262 private:
00263 friend class boost::iterator_core_access;
00264
00265 void increment() {
00266 m_label = m_node->brotherLabel();
00267 m_node = m_node->brother();
00268 }
00269
00270 bool equal(self const& other) const
00271 {
00272 return this->m_node == other.m_node;
00273 }
00274
00275 node_base& dereference() const { return *m_node; }
00276
00277 node_base* m_node;
00278 unsigned char m_label;
00279 };
00280
00281
00283 template<typename V, typename T>
00284 void walkTrie(V & v, TrieNode<T> const & n, std::string const & label="") {
00285 typedef TrieNode<T> const node_base;
00286 typedef TrieNodeIter<T> node_iterator;
00287 node_iterator e;
00288 for (node_iterator p(&n); p!=e; ++p) {
00289 v((*p).value(),label+(char)p.label());
00290 walkTrie(v,*p,label+(char)p.label());
00291 }
00292 }
00293
00294
00295
00297 template<typename V, typename T>
00298 bool iterateTrieLeaves(V & v, TrieNode<T> const & n, std::string const & label="") {
00299 typedef TrieNode<T> const node_base;
00300 typedef TrieNodeIter<T> node_iterator;
00301 node_iterator e;
00302 node_iterator p(&n);
00303 if (p==e) return true;
00304 for (; p!=e; ++p) {
00305 if (iterateTrieLeaves(v,*p,label+(char)p.label()) )
00306 v((*p).value(),label+(char)p.label());
00307 }
00308 return false;
00309 }
00310
00311
00312 }
00313
00314
00315
00316
00317
00318
00319
00320 template <typename T>
00321 edm::TrieFactory<T>::TrieFactory(unsigned paquetSize) :
00322 _paquetSize(paquetSize), _lastNodes(0x0), _nbUsedInLastNodes(0)
00323 {
00324 _lastNodes = new TrieNode<T>[paquetSize];
00325 }
00326
00327 template <typename T>
00328 edm::TrieFactory<T>::~TrieFactory()
00329 {
00330 typename std::list<TrieNode<T>*>::const_iterator it;
00331
00332 for (it = _allocatedNodes.begin(); it != _allocatedNodes.end(); ++it)
00333 delete[] *it;
00334 if (_lastNodes)
00335 delete[] _lastNodes;
00336 }
00337
00338 template <typename T>
00339 edm::TrieNode<T>* edm::TrieFactory<T>::newNode(const T &value)
00340 {
00341 if (_nbUsedInLastNodes == _paquetSize)
00342 {
00343 _allocatedNodes.push_back(_lastNodes);
00344 _nbUsedInLastNodes = 0;
00345 _lastNodes = new TrieNode<T>[_paquetSize];
00346 }
00347 TrieNode<T> *res = &_lastNodes[_nbUsedInLastNodes];
00348 ++_nbUsedInLastNodes;
00349 res->setValue(value);
00350 res->clear();
00351 return res;
00352 }
00353
00354 template <typename T>
00355 void edm::TrieFactory<T>::clear()
00356 {
00357 typename std::list<TrieNode<T>*>::const_iterator it;
00358 for (it = _allocatedNodes.begin(); it != _allocatedNodes.end(); ++it)
00359 delete[] *it;
00360 _allocatedNodes.clear();
00361 _nbUsedInLastNodes = 0;
00362 }
00363
00364
00365 template <typename T>
00366 edm::TrieNode<T>::TrieNode() :
00367 _brother(0), _brotherLabel(0), _firstSubNode(0), _firstSubNodeLabel(0)
00370 {
00371 }
00372
00373 template <typename T>
00374 edm::TrieNode<T>::~TrieNode()
00375 {
00376
00377
00378 }
00379
00380
00381 template <typename T>
00382 edm::TrieNodeIter<T>
00383 edm::TrieNode<T>::begin() const {
00384 return const_iterator(this);
00385 }
00386 template <typename T>
00387 edm::TrieNodeIter<T>
00388 edm::TrieNode<T>::end() const {
00389 return const_iterator(0);
00390 }
00391
00392 template <typename T>
00393 void edm::TrieNode<T>::setValue(const T &val)
00394 {
00395 _value = val;
00396 }
00397
00398 template <typename T>
00399 const T& edm::TrieNode<T>::value() const
00400 {
00401 return _value;
00402 }
00403
00404 template <typename T>
00405 const edm::TrieNode<T>* edm::TrieNode<T>::brother() const
00406 {
00407 return _brother;
00408 }
00409
00410 template <typename T>
00411 edm::TrieNode<T>* edm::TrieNode<T>::brother()
00412 {
00413 return _brother;
00414 }
00415
00416 template <typename T>
00417 const edm::TrieNode<T>* edm::TrieNode<T>::_getBrother(unsigned char chr) const
00418 {
00419 const TrieNode<T> *brother = _brother;
00420 return _sequentialSearch(brother, _brotherLabel, chr);
00421 }
00422
00423 template <typename T>
00424 edm::TrieNode<T>* edm::TrieNode<T>::_getBrother(unsigned char chr)
00425 {
00426 return _sequentialSearch(_brother, _brotherLabel, chr);
00427 }
00428
00429 template <typename T>
00430 void edm::TrieNode<T>::_addBrother(unsigned char chr, TrieNode<T> *brother)
00431 {
00432 if (!_brother || _brotherLabel > chr)
00433 {
00434 brother->_setBrother(_brother, _brotherLabel);
00435 _brother = brother;
00436 _brotherLabel = chr;
00437 }
00438 else
00439 _brother->_addBrother(chr, brother);
00440 }
00441
00442 template <typename T>
00443 unsigned char edm::TrieNode<T>::brotherLabel() const
00444 {
00445 return _brotherLabel;
00446 }
00447
00448 template <typename T>
00449 const edm::TrieNode<T>* edm::TrieNode<T>::subNode() const
00450 {
00451 return _firstSubNode;
00452 }
00453
00454 template <typename T>
00455 edm::TrieNode<T>* edm::TrieNode<T>::subNode()
00456 {
00457 return _firstSubNode;
00458 }
00459
00460 template <typename T>
00461 unsigned char edm::TrieNode<T>::subNodeLabel() const
00462 {
00463 return _firstSubNodeLabel;
00464 }
00465
00466 template <typename T>
00467 const edm::TrieNode<T>* edm::TrieNode<T>::subNodeByLabel(unsigned char chr) const
00468 {
00469 const TrieNode<T> *first = _firstSubNode;
00470 return _sequentialSearch(first, _firstSubNodeLabel, chr);
00471 }
00472
00473 template <typename T>
00474 edm::TrieNode<T>* edm::TrieNode<T>::subNodeByLabel(unsigned char chr)
00475 {
00476 return _sequentialSearch(_firstSubNode, _firstSubNodeLabel, chr);
00477 }
00478
00479 template <typename T>
00480 void edm::TrieNode<T>::addSubNode(unsigned char chr, TrieNode<T> *node)
00481 {
00482 if (!_firstSubNode || _firstSubNodeLabel > chr)
00483 {
00484 node->_setBrother(_firstSubNode, _firstSubNodeLabel);
00485 _firstSubNode = node;
00486 _firstSubNodeLabel = chr;
00487 }
00488 else
00489 _firstSubNode->_addBrother(chr, node);
00490 }
00491
00492 template <typename T>
00493 template <typename Node>
00494 inline Node edm::TrieNode<T>::_sequentialSearch(Node first, unsigned char label, unsigned char val) const
00495 {
00496 if (first && label <= val)
00497 {
00498 if (label == val)
00499 return first;
00500 return first->_getBrother(val);
00501 }
00502 return 0x0;
00503 }
00504
00505 template <typename T>
00506 void edm::TrieNode<T>::_setBrother(TrieNode<T> *brother, unsigned char brotherLabel)
00507 {
00508 _brother = brother;
00509 _brotherLabel = brotherLabel;
00510 }
00511
00512 template <typename T>
00513 void edm::TrieNode<T>::display(std::ostream &os, unsigned offset, unsigned char label) const
00514 {
00515 unsigned int i;
00516 for (i = 0; i < offset; ++i)
00517 os << " ";
00518 if (label)
00519 os << "label[" << label << "] ";
00520 os << "value[" << _value << "]" << std::endl;
00521 if (_firstSubNode)
00522 _firstSubNode->display(os, offset + 2, _firstSubNodeLabel);
00523 if (_brother)
00524 _brother->display(os, offset, _brotherLabel);
00525 }
00526
00527 template <typename T>
00528 void edm::TrieNode<T>::clear()
00529 {
00530 _brother = 0x0;
00531 _brotherLabel = 0;
00532 _firstSubNode = 0x0;
00533 _firstSubNodeLabel = 0;
00534 }
00535
00536
00537 #include <vector>
00538 #include <algorithm>
00539 #include <string>
00540 #include <cassert>
00541
00542 #include "FWCore/Utilities/interface/EDMException.h"
00543
00544
00545 namespace edm {
00546 namespace detailsTrie {
00547 inline void errorInsert(std::string const & key) {
00548 throw edm::Exception(edm::errors::InvalidReference)
00549 << "Trie::insert called with a key already in collection;\n"
00550 << "key value: " << key;
00551 }
00552 }
00553 }
00554
00555 template <typename T>
00556 edm::Trie<T>::Trie(const T &empty) :
00557 _empty(empty), _factory(0x0), _initialNode(0x0)
00558 {
00559
00560 _factory = new TrieFactory<T>(10000);
00561 _initialNode = _factory->newNode(_empty);
00562 }
00563
00564 template <typename T>
00565 edm::Trie<T>::~Trie()
00566 {
00567 delete _factory;
00568 }
00569
00570 template <typename T>
00571 void edm::Trie<T>::setEntry(std::string const & str, const T &value)
00572 {
00573 setEntry(str.c_str(),str.size(),value);
00574 }
00575 template <typename T>
00576 void edm::Trie<T>::setEntry(const char *str, unsigned strLen, const T &value)
00577 {
00578 TrieNode<T> *node = _addEntry(str, strLen);
00579 node->setValue(value);
00580 }
00581
00582 template <typename T>
00583 edm::TrieNode<T>* edm::Trie<T>::_addEntry(const char *str, unsigned strLen)
00584 {
00585 unsigned pos = 0;
00586 bool found = true;
00587 TrieNode<T> *node = _initialNode, *previous = 0x0;
00588
00589
00590 while (found && pos < strLen)
00591 {
00592 found = false;
00593 previous = node;
00594 node = node->subNodeByLabel(str[pos]);
00595 if (node)
00596 {
00597 found = true;
00598 ++pos;
00599 }
00600 }
00601
00602
00603 if (!node || pos != strLen)
00604 {
00605 node = previous;
00606 for (unsigned i = pos; i < strLen; ++i)
00607 {
00608 TrieNode<T> *newNode = _factory->newNode(_empty);
00609 node->addSubNode(str[pos], newNode);
00610 node = newNode;
00611 ++pos;
00612 }
00613 }
00614 assert(node != 0x0);
00615 return node;
00616 }
00617
00618
00619 template <typename T>
00620 void edm::Trie<T>::insert(std::string const & str, const T &value)
00621 {
00622 insert(str.c_str(),str.size(),value);
00623 }
00624
00625 template <typename T>
00626 void edm::Trie<T>::insert(const char *str, unsigned strLen, const T &value)
00627 {
00628 TrieNode<T> *node = _addEntry(str, strLen);
00629
00630
00631 if (node && node->value() != _empty)
00632 detailsTrie::errorInsert(std::string(str,strLen));
00633 node->setValue(value);
00634 }
00635
00636 template <typename T>
00637 const T& edm::Trie<T>::find(std::string const & str) const {
00638 return find(str.c_str(),str.size());
00639 }
00640
00641 template <typename T>
00642 const T& edm::Trie<T>::find(const char *str, unsigned strLen) const
00643 {
00644 unsigned pos = 0;
00645 bool found = true;
00646 const TrieNode<T> *node = _initialNode;
00647
00648 while (found && pos < strLen)
00649 {
00650 found = false;
00651 node = node->subNodeByLabel(str[pos]);
00652 if (node)
00653 {
00654 found = true;
00655 ++pos;
00656 }
00657 }
00658 if (node && pos == strLen)
00659 return node->value();
00660 return _empty;
00661 }
00662
00663 template <typename T>
00664 edm::TrieNode<T> const *
00665 edm::Trie<T>::node(std::string const & str) const {
00666 return node(str.c_str(),str.size());
00667 }
00668
00669 template <typename T>
00670 edm::TrieNode<T> const *
00671 edm::Trie<T>::node(const char *str, unsigned strLen) const {
00672 unsigned pos = 0;
00673 bool found = true;
00674 const TrieNode<T> *node = _initialNode;
00675
00676 while (found && pos < strLen)
00677 {
00678 found = false;
00679 node = node->subNodeByLabel(str[pos]);
00680 if (node)
00681 {
00682 found = true;
00683 ++pos;
00684 }
00685 }
00686 return node;
00687 }
00688
00689
00690 template <typename T>
00691 const edm::TrieNode<T>* edm::Trie<T>::initialNode() const
00692 {
00693 return _initialNode;
00694 }
00695
00696 template <typename T>
00697 void edm::Trie<T>::clear()
00698 {
00699 _factory->clear();
00700 _initialNode = _factory->newNode(_empty);
00701 }
00702
00703 template <typename T>
00704 void edm::Trie<T>::display(std::ostream &os)
00705 {
00706 if (_initialNode)
00707 _initialNode->display(os, 0, 0);
00708 }
00709
00710 #endif // DataFormats_Common_Trie_h