CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
Trie.h
Go to the documentation of this file.
1 #ifndef DataFormats_Common_Trie_h
2 #define DataFormats_Common_Trie_h
3 /*
4 **
5 **
6 ** Copyright (C) 2006 Julien Lemoine
7 ** This program is free software; you can redistribute it and/or modify
8 ** it under the terms of the GNU Lesser General Public License as published by
9 ** the Free Software Foundation; either version 2 of the License, or
10 ** (at your option) any later version.
11 **
12 ** This program is distributed in the hope that it will be useful,
13 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ** GNU Lesser General Public License for more details.
16 **
17 ** You should have received a copy of the GNU Lesser General Public License
18 ** along with this program; if not, write to the Free Software
19 ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 **
21 **
22 ** modified by Vincenzo Innocente on 15/08/2007
23 **
24 */
25 
26 
27 #include <list>
28 #include <string>
29 
30 namespace edm
31 {
32  // fwd declaration
33  template <typename T>
34  class TrieNode;
35 
40  template <typename T>
42  {
43  public:
44  TrieFactory(unsigned paquetSize);
45  ~TrieFactory();
46 
47  private:
49  TrieFactory();
51  TrieFactory(const TrieFactory &e);
54 
55  public:
56  TrieNode<T>* newNode(const T &value);
57  void clear();
58 
59  private:
60  unsigned _paquetSize;
61  std::list<TrieNode<T>*> _allocatedNodes;
64  };
65 }
66 
67 
68 namespace edm
69 {
70  template<typename T>
71  class TrieNodeIter;
72 
73 
79  template <typename T>
80  class TrieNode
81  {
82  public:
83 
85 
86  TrieNode();
87  ~TrieNode();
88 
89  private:
91  TrieNode(const TrieNode &e);
93  TrieNode& operator=(const TrieNode &e);
94 
95  public:
97  void setValue(const T &val);
99  const T& value() const;
100 
102  const TrieNode<T>* brother() const;
103  TrieNode<T>* brother();
105  unsigned char brotherLabel() const;
106 
107 
109  TrieNodeIter<T> begin() const;
111  TrieNodeIter<T> end() const;
112 
113  // get first sub Node
114  const TrieNode<T>* subNode() const;
115  TrieNode<T>* subNode();
116  unsigned char subNodeLabel() const;
117 
118  // Looking for a sub node
119  const TrieNode<T>* subNodeByLabel(unsigned char chr) const;
120  TrieNode<T>* subNodeByLabel(unsigned char chr);
121 
122  // add an edge
123  void addSubNode(unsigned char chr, TrieNode<T> *node);
124 
126  void display(std::ostream &os, unsigned offset, unsigned char label) const;
127 
129  void clear();
130 
131  private:
132  template <typename Node>
133  Node _sequentialSearch(Node first, unsigned char label,
134  unsigned char val) const;
136  void _setBrother(TrieNode<T> *brother, unsigned char brotherLabel);
138  void _addBrother(unsigned char chr, TrieNode<T> *brother);
143  const TrieNode<T>* _getBrother(unsigned char chr) const;
148  TrieNode<T>* _getBrother(unsigned char chr);
149 
150  private:
154  unsigned char _brotherLabel;
158  unsigned char _firstSubNodeLabel;
161  };
162 }
163 
164 #include <ostream>
165 
166 namespace edm
167 {
168  //fwd declaration
169  template <typename T>
170  class TrieFactory;
171  template <typename T>
172  class TrieNode;
173 
178  template <typename T>
179  class Trie
180  {
181  public:
184  Trie(const T &empty);
185  ~Trie();
186 
187  private:
189  Trie();
191  Trie(const Trie &e);
193  Trie& operator=(const Trie &e);
194 
195  public:
198  void insert(std::string const & str, const T &value);
199  void insert(const char *str, unsigned strLen, const T &value);
202  void setEntry(std::string const & str, const T &value);
203  void setEntry(const char *str, unsigned strLen, const T &value);
205  const T& find(std::string const & str) const;
206  const T& find(const char *str, unsigned strLen) const;
208  const TrieNode<T>* node(std::string const & str) const;
209  const TrieNode<T>* node(const char *str, unsigned strLen) const;
211  const TrieNode<T>* initialNode() const;
213  void display(std::ostream &os);
215  void clear();
216 
217  private:
218  TrieNode<T>* _addEntry(const char *str, unsigned strLen);
219 
220  private:
227  };
228 }
229 
230 
231 
232 #include <boost/iterator/iterator_facade.hpp>
233 
234 #include<string>
235 #include<ostream>
236 
237 
238 // iterators and visitors
239 
240 namespace edm{
241 
242  template<typename T>
243  class TrieNodeIter
244  : public boost::iterator_facade<TrieNodeIter<T>,
245  TrieNode<T> const,
246  boost::forward_traversal_tag >
247  {
248 
249  public:
250  typedef TrieNodeIter<T> self;
251  typedef TrieNode<T> const node_base;
253  : m_node(0), m_label(0)
254  {}
255 
257  : m_node(p ? p->subNode() : 0),
258  m_label(p ? p->subNodeLabel() : 0)
259  {}
260 
261  unsigned char label() const { return m_label;}
262  private:
264 
265  void increment() {
267  m_node = m_node->brother();
268  }
269 
270  bool equal(self const& other) const
271  {
272  return this->m_node == other.m_node;
273  }
274 
275  node_base& dereference() const { return *m_node; }
276 
278  unsigned char m_label;
279  };
280 
281 
283  template<typename V, typename T>
284  void walkTrie(V & v, TrieNode<T> const & n, std::string const & label="") {
285  typedef TrieNode<T> const node_base;
287  node_iterator e;
288  for (node_iterator p(&n); p!=e; ++p) {
289  v((*p).value(),label+(char)p.label());
290  walkTrie(v,*p,label+(char)p.label());
291  }
292  }
293 
294 
295 
297  template<typename V, typename T>
298  bool iterateTrieLeaves(V & v, TrieNode<T> const & n, std::string const & label="") {
299  typedef TrieNode<T> const node_base;
301  node_iterator e;
302  node_iterator p(&n);
303  if (p==e) return true;
304  for (; p!=e; ++p) {
305  if (iterateTrieLeaves(v,*p,label+(char)p.label()) )
306  v((*p).value(),label+(char)p.label());
307  }
308  return false;
309  }
310 
311 
312 }
313 
314 //
315 //----------------------------------------------------------------
316 //
317 // implementations
318 
319 
320 template <typename T>
321 edm::TrieFactory<T>::TrieFactory(unsigned paquetSize) :
322  _paquetSize(paquetSize), _lastNodes(0x0), _nbUsedInLastNodes(0)
323 {
324  _lastNodes = new TrieNode<T>[paquetSize];
325 }
326 
327 template <typename T>
329 {
330  typename std::list<TrieNode<T>*>::const_iterator it;
331 
332  for (it = _allocatedNodes.begin(); it != _allocatedNodes.end(); ++it)
333  delete[] *it;
334  if (_lastNodes)
335  delete[] _lastNodes;
336 }
337 
338 template <typename T>
340 {
341  if (_nbUsedInLastNodes == _paquetSize)
342  {
343  _allocatedNodes.push_back(_lastNodes);
344  _nbUsedInLastNodes = 0;
345  _lastNodes = new TrieNode<T>[_paquetSize];
346  }
347  TrieNode<T> *res = &_lastNodes[_nbUsedInLastNodes];
348  ++_nbUsedInLastNodes;
349  res->setValue(value);
350  res->clear();
351  return res;
352 }
353 
354 template <typename T>
356 {
357  typename std::list<TrieNode<T>*>::const_iterator it;
358  for (it = _allocatedNodes.begin(); it != _allocatedNodes.end(); ++it)
359  delete[] *it;
360  _allocatedNodes.clear();
361  _nbUsedInLastNodes = 0;
362 }
363 
364 
365 template <typename T>
367  _brother(0), _brotherLabel(0), _firstSubNode(0), _firstSubNodeLabel(0),_value()
370 {
371 }
372 
373 template <typename T>
375 {
376  // do not delete _brother and _firstSubNode because they are
377  // allocated by factory (TrieFactory) and factory will delete them
378 }
379 
380 
381 template <typename T>
384  return const_iterator(this);
385 }
386 template <typename T>
389  return const_iterator(0);
390 }
391 
392 template <typename T>
394 {
395  _value = val;
396 }
397 
398 template <typename T>
400 {
401  return _value;
402 }
403 
404 template <typename T>
406 {
407  return _brother;
408 }
409 
410 template <typename T>
412 {
413  return _brother;
414 }
415 
416 template <typename T>
417 const edm::TrieNode<T>* edm::TrieNode<T>::_getBrother(unsigned char chr) const
418 {
419  const TrieNode<T> *brother = _brother;
420  return _sequentialSearch(brother, _brotherLabel, chr);
421 }
422 
423 template <typename T>
425 {
426  return _sequentialSearch(_brother, _brotherLabel, chr);
427 }
428 
429 template <typename T>
430 void edm::TrieNode<T>::_addBrother(unsigned char chr, TrieNode<T> *brother)
431 {
432  if (!_brother || _brotherLabel > chr)
433  {
434  brother->_setBrother(_brother, _brotherLabel);
435  _brother = brother;
436  _brotherLabel = chr;
437  }
438  else
439  _brother->_addBrother(chr, brother);
440 }
441 
442 template <typename T>
443 unsigned char edm::TrieNode<T>::brotherLabel() const
444 {
445  return _brotherLabel;
446 }
447 
448 template <typename T>
450 {
451  return _firstSubNode;
452 }
453 
454 template <typename T>
456 {
457  return _firstSubNode;
458 }
459 
460 template <typename T>
461 unsigned char edm::TrieNode<T>::subNodeLabel() const
462 {
463  return _firstSubNodeLabel;
464 }
465 
466 template <typename T>
467 const edm::TrieNode<T>* edm::TrieNode<T>::subNodeByLabel(unsigned char chr) const
468 {
469  const TrieNode<T> *first = _firstSubNode;
470  return _sequentialSearch(first, _firstSubNodeLabel, chr);
471 }
472 
473 template <typename T>
475 {
476  return _sequentialSearch(_firstSubNode, _firstSubNodeLabel, chr);
477 }
478 
479 template <typename T>
481 {
482  if (!_firstSubNode || _firstSubNodeLabel > chr)
483  {
484  node->_setBrother(_firstSubNode, _firstSubNodeLabel);
485  _firstSubNode = node;
486  _firstSubNodeLabel = chr;
487  }
488  else
489  _firstSubNode->_addBrother(chr, node);
490 }
491 
492 template <typename T>
493 template <typename Node>
494 inline Node edm::TrieNode<T>::_sequentialSearch(Node first, unsigned char label, unsigned char val) const
495 {
496  if (first && label <= val)
497  {
498  if (label == val)
499  return first;
500  return first->_getBrother(val);
501  }
502  return 0x0;
503 }
504 
505 template <typename T>
506 void edm::TrieNode<T>::_setBrother(TrieNode<T> *brother, unsigned char brotherLabel)
507 {
508  _brother = brother;
509  _brotherLabel = brotherLabel;
510 }
511 
512 template <typename T>
513 void edm::TrieNode<T>::display(std::ostream &os, unsigned offset, unsigned char label) const
514 {
515  unsigned int i;
516  for (i = 0; i < offset; ++i)
517  os << " ";
518  if (label)
519  os << "label[" << label << "] ";
520  os << "value[" << _value << "]" << std::endl;
521  if (_firstSubNode)
522  _firstSubNode->display(os, offset + 2, _firstSubNodeLabel);
523  if (_brother)
524  _brother->display(os, offset, _brotherLabel);
525 }
526 
527 template <typename T>
529 {
530  _brother = 0x0;
531  _brotherLabel = 0;
532  _firstSubNode = 0x0;
533  _firstSubNodeLabel = 0;
534 }
535 
536 
537 #include <vector>
538 #include <algorithm>
539 #include <string>
540 #include <cassert>
541 
543 
544 
545 namespace edm {
546  namespace detailsTrie {
547  inline void errorInsert(std::string const & key) {
549  "Trie::insert called with a key already in collection;\n"
550  "key value: ",
551  key.c_str());
552  }
553  }
554 }
555 
556 template <typename T>
558  _empty(empty), _factory(0x0), _initialNode(0x0)
559 {
560  // initialize nodes by paquets of 10000
561  _factory = new TrieFactory<T>(10000);
562  _initialNode = _factory->newNode(_empty);
563 }
564 
565 template <typename T>
567 {
568  delete _factory;
569 }
570 
571 template <typename T>
572 void edm::Trie<T>::setEntry(std::string const & str, const T &value)
573 {
574  setEntry(str.c_str(),str.size(),value);
575 }
576 template <typename T>
577 void edm::Trie<T>::setEntry(const char *str, unsigned strLen, const T &value)
578 {
579  TrieNode<T> *node = _addEntry(str, strLen);
580  node->setValue(value);
581 }
582 
583 template <typename T>
584 edm::TrieNode<T>* edm::Trie<T>::_addEntry(const char *str, unsigned strLen)
585 {
586  unsigned pos = 0;
587  bool found = true;
588  TrieNode<T> *node = _initialNode, *previous = 0x0;
589 
590  // Look for the part of the word which is in Trie
591  while (found && pos < strLen)
592  {
593  found = false;
594  previous = node;
595  node = node->subNodeByLabel(str[pos]);
596  if (node)
597  {
598  found = true;
599  ++pos;
600  }
601  }
602 
603  // Add part of the word which is not in Trie
604  if (!node || pos != strLen)
605  {
606  node = previous;
607  for (unsigned i = pos; i < strLen; ++i)
608  {
609  TrieNode<T> *newNode = _factory->newNode(_empty);
610  node->addSubNode(str[pos], newNode);
611  node = newNode;
612  ++pos;
613  }
614  }
615  assert(node != 0x0);
616  return node;
617 }
618 
619 
620 template <typename T>
621 void edm::Trie<T>::insert(std::string const & str, const T &value)
622 {
623  insert(str.c_str(),str.size(),value);
624 }
625 
626 template <typename T>
627 void edm::Trie<T>::insert(const char *str, unsigned strLen, const T &value)
628 {
629  TrieNode<T> *node = _addEntry(str, strLen);
630 
631  // Set the value on the last node
632  if (node && node->value() != _empty)
633  detailsTrie::errorInsert(std::string(str,strLen));
634  node->setValue(value);
635 }
636 
637 template <typename T>
638 const T& edm::Trie<T>::find(std::string const & str) const {
639  return find(str.c_str(),str.size());
640 }
641 
642 template <typename T>
643 const T& edm::Trie<T>::find(const char *str, unsigned strLen) const
644 {
645  unsigned pos = 0;
646  bool found = true;
647  const TrieNode<T> *node = _initialNode;
648 
649  while (found && pos < strLen)
650  {
651  found = false;
652  node = node->subNodeByLabel(str[pos]);
653  if (node)
654  {
655  found = true;
656  ++pos;
657  }
658  }
659  if (node && pos == strLen) // The word is complet in the automaton
660  return node->value();
661  return _empty;
662 }
663 
664 template <typename T>
665 edm::TrieNode<T> const *
666 edm::Trie<T>::node(std::string const & str) const {
667  return node(str.c_str(),str.size());
668  }
669 
670 template <typename T>
671 edm::TrieNode<T> const *
672 edm::Trie<T>::node(const char *str, unsigned strLen) const {
673  unsigned pos = 0;
674  bool found = true;
675  const TrieNode<T> *node = _initialNode;
676 
677  while (found && pos < strLen)
678  {
679  found = false;
680  node = node->subNodeByLabel(str[pos]);
681  if (node)
682  {
683  found = true;
684  ++pos;
685  }
686  }
687  return node;
688 }
689 
690 
691 template <typename T>
693 {
694  return _initialNode;
695 }
696 
697 template <typename T>
699 {
700  _factory->clear();
701  _initialNode = _factory->newNode(_empty);
702 }
703 
704 template <typename T>
705 void edm::Trie<T>::display(std::ostream &os)
706 {
707  if (_initialNode)
708  _initialNode->display(os, 0, 0);
709 }
710 
711 #endif // DataFormats_Common_Trie_h
void clear()
clear content of TrieNode
Definition: Trie.h:528
unsigned char m_label
Definition: Trie.h:278
int i
Definition: DBlmapReader.cc:9
void display(std::ostream &os, unsigned offset, unsigned char label) const
display content of node in output stream
Definition: Trie.h:513
TrieFactory & operator=(const TrieFactory &e)
avoid affectation operator
~Trie()
Definition: Trie.h:566
edm::TrieNodeIter< PDet > node_iterator
this class represent the node of a trie, it contains a link to a sub node and a link to a brother (no...
Definition: Trie.h:34
void _setBrother(TrieNode< T > *brother, unsigned char brotherLabel)
set brother (used by sort)
Definition: Trie.h:506
void addSubNode(unsigned char chr, TrieNode< T > *node)
Definition: Trie.h:480
const TrieNode< T > * _getBrother(unsigned char chr) const
Definition: Trie.h:417
TrieNode< T > * _lastNodes
Definition: Trie.h:62
void setValue(const T &val)
set value associed to node
Definition: Trie.h:393
const TrieNode< T > * brother() const
get brother (return 0x0 this node has no brother)
Definition: Trie.h:405
const TrieNode< T > * subNode() const
Definition: Trie.h:449
unsigned char _firstSubNodeLabel
character to go to first subnode
Definition: Trie.h:158
void clear()
Definition: Trie.h:355
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:7
TrieNode< T > * _firstSubNode
pointer to first sub node
Definition: Trie.h:156
TrieNodeIter< T > end() const
mark end of iteration (std conforming)
Definition: Trie.h:388
tuple node
Definition: Node.py:50
TrieNode< T > * newNode(const T &value)
Definition: Trie.h:339
static void throwThis(Code category, char const *message0="", char const *message1="", char const *message2="", char const *message3="", char const *message4="")
TrieNode()
Definition: Trie.h:366
bool iterateTrieLeaves(V &v, TrieNode< T > const &n, std::string const &label="")
visits only leaf nodes
Definition: Trie.h:298
void display(std::ostream &os)
display content of trie in output stream
Definition: Trie.h:705
void insert(std::string const &str, const T &value)
Definition: Trie.h:621
~TrieNode()
Definition: Trie.h:374
TrieNodeIter< T > begin() const
initialize subnode iterator (std conforming)
Definition: Trie.h:383
void setEntry(std::string const &str, const T &value)
Definition: Trie.h:572
TrieNode< T > const node_base
Definition: Trie.h:251
const T & value() const
get value associed to node
Definition: Trie.h:399
Node _sequentialSearch(Node first, unsigned char label, unsigned char val) const
Definition: Trie.h:494
void increment()
Definition: Trie.h:265
const T & find(std::string const &str) const
get an entry in the Trie
Definition: Trie.h:638
TrieFactory()
avoid default constructor
void _addBrother(unsigned char chr, TrieNode< T > *brother)
add a new brother
Definition: Trie.h:430
const TrieNode< T > * initialNode() const
get initial TrieNode
Definition: Trie.h:692
T _empty
value returned when no match is found in trie
Definition: Trie.h:222
std::list< TrieNode< T > * > _allocatedNodes
Definition: Trie.h:61
unsigned int offset(bool)
friend class boost::iterator_core_access
Definition: Trie.h:263
TrieNodeIter< T > const_iterator
Definition: Trie.h:84
void errorInsert(std::string const &key)
Definition: Trie.h:547
void walkTrie(V &v, TrieNode< T > const &n, std::string const &label="")
visit each node of the trie
Definition: Trie.h:284
unsigned _nbUsedInLastNodes
Definition: Trie.h:63
void clear()
clear the content of trie
Definition: Trie.h:698
bool equal(self const &other) const
Definition: Trie.h:270
TrieNode & operator=(const TrieNode &e)
avoid affectation operator
TrieNodeIter(node_base *p)
Definition: Trie.h:256
TrieFactory< T > * _factory
factory
Definition: Trie.h:224
Trie & operator=(const Trie &e)
avoid affectation operator
Trie()
avoid default constructor
TrieNode< T > * _initialNode
first node of trie
Definition: Trie.h:226
list key
Definition: combine.py:13
unsigned char label() const
Definition: Trie.h:261
const TrieNode< T > * subNodeByLabel(unsigned char chr) const
Definition: Trie.h:467
T _value
value associed to this node
Definition: Trie.h:160
unsigned char brotherLabel() const
get brother label
Definition: Trie.h:443
edm::TrieNode< PDet > Node
node_base * m_node
Definition: Trie.h:277
unsigned _paquetSize
Definition: Trie.h:60
T first(std::pair< T, U > const &p)
long double T
unsigned char subNodeLabel() const
Definition: Trie.h:461
unsigned char _brotherLabel
character to go to brother (node with same father as this one)
Definition: Trie.h:154
bool insert(Storage &, ItemType *, const IdTag &)
const TrieNode< T > * node(std::string const &str) const
get node matching a string
Definition: Trie.h:666
TrieNode< T > * _addEntry(const char *str, unsigned strLen)
Definition: Trie.h:584
mathSSE::Vec4< T > v
node_base & dereference() const
Definition: Trie.h:275
TrieNode< T > * _brother
pointer to brother (node with same father as this one)
Definition: Trie.h:152