CMS 3D CMS Logo

Trie.h
Go to the documentation of this file.
1 #ifndef DataFormats_Common_Trie_h
2 #define DataFormats_Common_Trie_h
3 /*
4 **
5 **
6 ** Copyright (C) 2006 Julien Lemoine
7 ** This program is free software; you can redistribute it and/or modify
8 ** it under the terms of the GNU Lesser General Public License as published by
9 ** the Free Software Foundation; either version 2 of the License, or
10 ** (at your option) any later version.
11 **
12 ** This program is distributed in the hope that it will be useful,
13 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ** GNU Lesser General Public License for more details.
16 **
17 ** You should have received a copy of the GNU Lesser General Public License
18 ** along with this program; if not, write to the Free Software
19 ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 **
21 **
22 ** modified by Vincenzo Innocente on 15/08/2007
23 **
24 */
25 
26 
27 #include <list>
28 #include <string>
29 
30 namespace edm
31 {
32  // fwd declaration
33  template <typename T>
34  class TrieNode;
35 
40  template <typename T>
42  {
43  public:
44  TrieFactory(unsigned paquetSize);
45  ~TrieFactory();
46 
47  private:
49  TrieFactory();
51  TrieFactory(const TrieFactory &e);
54 
55  public:
56  TrieNode<T>* newNode(const T &value);
57  void clear();
58 
59  private:
60  unsigned _paquetSize;
61  std::list<TrieNode<T>*> _allocatedNodes;
64  };
65 }
66 
67 
68 namespace edm
69 {
70  template<typename T>
71  class TrieNodeIter;
72 
73 
79  template <typename T>
80  class TrieNode
81  {
82  public:
83 
85 
86  TrieNode();
87  ~TrieNode();
88 
89  private:
91  TrieNode(const TrieNode &e);
93  TrieNode& operator=(const TrieNode &e);
94 
95  public:
97  void setValue(const T &val);
99  const T& value() const;
100 
102  const TrieNode<T>* brother() const;
103  TrieNode<T>* brother();
105  unsigned char brotherLabel() const;
106 
107 
109  TrieNodeIter<T> begin() const;
111  TrieNodeIter<T> end() const;
112 
113  // get first sub Node
114  const TrieNode<T>* subNode() const;
115  TrieNode<T>* subNode();
116  unsigned char subNodeLabel() const;
117 
118  // Looking for a sub node
119  const TrieNode<T>* subNodeByLabel(unsigned char chr) const;
120  TrieNode<T>* subNodeByLabel(unsigned char chr);
121 
122  // add an edge
123  void addSubNode(unsigned char chr, TrieNode<T> *node);
124 
126  void display(std::ostream &os, unsigned offset, unsigned char label) const;
127 
129  void clear();
130 
131  private:
132  template <typename Node>
133  Node _sequentialSearch(Node first, unsigned char label,
134  unsigned char val) const;
136  void _setBrother(TrieNode<T> *brother, unsigned char brotherLabel);
138  void _addBrother(unsigned char chr, TrieNode<T> *brother);
143  const TrieNode<T>* _getBrother(unsigned char chr) const;
148  TrieNode<T>* _getBrother(unsigned char chr);
149 
150  private:
154  unsigned char _brotherLabel;
158  unsigned char _firstSubNodeLabel;
161  };
162 }
163 
164 #include <ostream>
165 
166 namespace edm
167 {
168  //fwd declaration
169  template <typename T>
170  class TrieFactory;
171  template <typename T>
172  class TrieNode;
173 
178  template <typename T>
179  class Trie
180  {
181  public:
184  Trie(const T &empty);
185  ~Trie();
186 
187  private:
189  Trie();
191  Trie(const Trie &e);
193  Trie& operator=(const Trie &e);
194 
195  public:
198  void insert(std::string const & str, const T &value);
199  void insert(const char *str, unsigned strLen, const T &value);
202  void setEntry(std::string const & str, const T &value);
203  void setEntry(const char *str, unsigned strLen, const T &value);
205  const T& find(std::string const & str) const;
206  const T& find(const char *str, unsigned strLen) const;
208  const TrieNode<T>* node(std::string const & str) const;
209  const TrieNode<T>* node(const char *str, unsigned strLen) const;
211  const TrieNode<T>* initialNode() const;
213  void display(std::ostream &os);
215  void clear();
216 
217  private:
218  TrieNode<T>* _addEntry(const char *str, unsigned strLen);
219 
220  private:
227  };
228 }
229 
230 
231 
232 #include <boost/iterator/iterator_facade.hpp>
233 
234 #include<string>
235 #include<ostream>
236 
237 
238 // iterators and visitors
239 
240 namespace edm{
241 
242  template<typename T>
243  class TrieNodeIter
244  : public boost::iterator_facade<TrieNodeIter<T>,
245  TrieNode<T> const,
246  boost::forward_traversal_tag >
247  {
248 
249  public:
250  typedef TrieNodeIter<T> self;
251  typedef TrieNode<T> const node_base;
253  : m_node(0), m_label(0)
254  {}
255 
256  explicit TrieNodeIter(node_base* p)
257  : m_node(p ? p->subNode() : 0),
258  m_label(p ? p->subNodeLabel() : 0)
259  {}
260 
261  unsigned char label() const { return m_label;}
262  private:
263  friend class boost::iterator_core_access;
264 
265  void increment() {
266  m_label = m_node->brotherLabel();
267  m_node = m_node->brother();
268  }
269 
270  bool equal(self const& other) const
271  {
272  return this->m_node == other.m_node;
273  }
274 
275  node_base& dereference() const { return *m_node; }
276 
277  node_base* m_node;
278  unsigned char m_label;
279  };
280 
281 
283  template<typename V, typename T>
284  void walkTrie(V & v, TrieNode<T> const & n, std::string const & label="") {
285  typedef TrieNodeIter<T> node_iterator;
286  node_iterator e;
287  for (node_iterator p(&n); p!=e; ++p) {
288  v((*p).value(),label+(char)p.label());
289  walkTrie(v,*p,label+(char)p.label());
290  }
291  }
292 
293 
294 
296  template<typename V, typename T>
297  bool iterateTrieLeaves(V & v, TrieNode<T> const & n, std::string const & label="") {
298  typedef TrieNodeIter<T> node_iterator;
299  node_iterator e;
300  node_iterator p(&n);
301  if (p==e) return true;
302  for (; p!=e; ++p) {
303  if (iterateTrieLeaves(v,*p,label+(char)p.label()) )
304  v((*p).value(),label+(char)p.label());
305  }
306  return false;
307  }
308 
309 
310 }
311 
312 //
313 //----------------------------------------------------------------
314 //
315 // implementations
316 
317 
318 template <typename T>
319 edm::TrieFactory<T>::TrieFactory(unsigned paquetSize) :
320  _paquetSize(paquetSize), _lastNodes(0x0), _nbUsedInLastNodes(0)
321 {
322  _lastNodes = new TrieNode<T>[paquetSize];
323 }
324 
325 template <typename T>
327 {
328  typename std::list<TrieNode<T>*>::const_iterator it;
329 
330  for (it = _allocatedNodes.begin(); it != _allocatedNodes.end(); ++it)
331  delete[] *it;
332  if (_lastNodes)
333  delete[] _lastNodes;
334 }
335 
336 template <typename T>
338 {
340  {
341  _allocatedNodes.push_back(_lastNodes);
342  _nbUsedInLastNodes = 0;
344  }
347  res->setValue(value);
348  res->clear();
349  return res;
350 }
351 
352 template <typename T>
354 {
355  typename std::list<TrieNode<T>*>::const_iterator it;
356  for (it = _allocatedNodes.begin(); it != _allocatedNodes.end(); ++it)
357  delete[] *it;
358  _allocatedNodes.clear();
359  _nbUsedInLastNodes = 0;
360 }
361 
362 
363 template <typename T>
365  _brother(0), _brotherLabel(0), _firstSubNode(0), _firstSubNodeLabel(0),_value()
368 {
369 }
370 
371 template <typename T>
373 {
374  // do not delete _brother and _firstSubNode because they are
375  // allocated by factory (TrieFactory) and factory will delete them
376 }
377 
378 
379 template <typename T>
382  return const_iterator(this);
383 }
384 template <typename T>
387  return const_iterator(0);
388 }
389 
390 template <typename T>
392 {
393  _value = val;
394 }
395 
396 template <typename T>
398 {
399  return _value;
400 }
401 
402 template <typename T>
404 {
405  return _brother;
406 }
407 
408 template <typename T>
410 {
411  return _brother;
412 }
413 
414 template <typename T>
415 const edm::TrieNode<T>* edm::TrieNode<T>::_getBrother(unsigned char chr) const
416 {
417  const TrieNode<T> *brother = _brother;
418  return _sequentialSearch(brother, _brotherLabel, chr);
419 }
420 
421 template <typename T>
423 {
425 }
426 
427 template <typename T>
429 {
430  if (!_brother || _brotherLabel > chr)
431  {
433  _brother = brother;
434  _brotherLabel = chr;
435  }
436  else
437  _brother->_addBrother(chr, brother);
438 }
439 
440 template <typename T>
441 unsigned char edm::TrieNode<T>::brotherLabel() const
442 {
443  return _brotherLabel;
444 }
445 
446 template <typename T>
448 {
449  return _firstSubNode;
450 }
451 
452 template <typename T>
454 {
455  return _firstSubNode;
456 }
457 
458 template <typename T>
459 unsigned char edm::TrieNode<T>::subNodeLabel() const
460 {
461  return _firstSubNodeLabel;
462 }
463 
464 template <typename T>
465 const edm::TrieNode<T>* edm::TrieNode<T>::subNodeByLabel(unsigned char chr) const
466 {
468  return _sequentialSearch(first, _firstSubNodeLabel, chr);
469 }
470 
471 template <typename T>
473 {
475 }
476 
477 template <typename T>
478 void edm::TrieNode<T>::addSubNode(unsigned char chr, TrieNode<T> *node)
479 {
480  if (!_firstSubNode || _firstSubNodeLabel > chr)
481  {
483  _firstSubNode = node;
484  _firstSubNodeLabel = chr;
485  }
486  else
487  _firstSubNode->_addBrother(chr, node);
488 }
489 
490 template <typename T>
491 template <typename Node>
492 inline Node edm::TrieNode<T>::_sequentialSearch(Node first, unsigned char label, unsigned char val) const
493 {
494  if (first && label <= val)
495  {
496  if (label == val)
497  return first;
498  return first->_getBrother(val);
499  }
500  return 0x0;
501 }
502 
503 template <typename T>
505 {
506  _brother = brother;
508 }
509 
510 template <typename T>
511 void edm::TrieNode<T>::display(std::ostream &os, unsigned offset, unsigned char label) const
512 {
513  unsigned int i;
514  for (i = 0; i < offset; ++i)
515  os << " ";
516  if (label)
517  os << "label[" << label << "] ";
518  os << "value[" << _value << "]" << std::endl;
519  if (_firstSubNode)
520  _firstSubNode->display(os, offset + 2, _firstSubNodeLabel);
521  if (_brother)
522  _brother->display(os, offset, _brotherLabel);
523 }
524 
525 template <typename T>
527 {
528  _brother = 0x0;
529  _brotherLabel = 0;
530  _firstSubNode = 0x0;
531  _firstSubNodeLabel = 0;
532 }
533 
534 
535 #include <vector>
536 #include <algorithm>
537 #include <string>
538 #include <cassert>
539 
541 
542 
543 namespace edm {
544  namespace detailsTrie {
545  inline void errorInsert(std::string const & key) {
547  "Trie::insert called with a key already in collection;\n"
548  "key value: ",
549  key.c_str());
550  }
551  }
552 }
553 
554 template <typename T>
556  _empty(empty), _factory(0x0), _initialNode(0x0)
557 {
558  // initialize nodes by paquets of 10000
559  _factory = new TrieFactory<T>(10000);
560  _initialNode = _factory->newNode(_empty);
561 }
562 
563 template <typename T>
565 {
566  delete _factory;
567 }
568 
569 template <typename T>
571 {
572  setEntry(str.c_str(),str.size(),value);
573 }
574 template <typename T>
575 void edm::Trie<T>::setEntry(const char *str, unsigned strLen, const T &value)
576 {
577  TrieNode<T> *node = _addEntry(str, strLen);
578  node->setValue(value);
579 }
580 
581 template <typename T>
582 edm::TrieNode<T>* edm::Trie<T>::_addEntry(const char *str, unsigned strLen)
583 {
584  unsigned pos = 0;
585  bool found = true;
586  TrieNode<T> *node = _initialNode, *previous = 0x0;
587 
588  // Look for the part of the word which is in Trie
589  while (found && pos < strLen)
590  {
591  found = false;
592  previous = node;
593  node = node->subNodeByLabel(str[pos]);
594  if (node)
595  {
596  found = true;
597  ++pos;
598  }
599  }
600 
601  // Add part of the word which is not in Trie
602  if (!node || pos != strLen)
603  {
604  node = previous;
605  for (unsigned i = pos; i < strLen; ++i)
606  {
607  TrieNode<T> *newNode = _factory->newNode(_empty);
608  node->addSubNode(str[pos], newNode);
609  node = newNode;
610  ++pos;
611  }
612  }
613  assert(node != 0x0);
614  return node;
615 }
616 
617 
618 template <typename T>
620 {
621  insert(str.c_str(),str.size(),value);
622 }
623 
624 template <typename T>
625 void edm::Trie<T>::insert(const char *str, unsigned strLen, const T &value)
626 {
627  TrieNode<T> *node = _addEntry(str, strLen);
628 
629  // Set the value on the last node
630  if (node && node->value() != _empty)
632  node->setValue(value);
633 }
634 
635 template <typename T>
636 const T& edm::Trie<T>::find(std::string const & str) const {
637  return find(str.c_str(),str.size());
638 }
639 
640 template <typename T>
641 const T& edm::Trie<T>::find(const char *str, unsigned strLen) const
642 {
643  unsigned pos = 0;
644  bool found = true;
645  const TrieNode<T> *node = _initialNode;
646 
647  while (found && pos < strLen)
648  {
649  found = false;
650  node = node->subNodeByLabel(str[pos]);
651  if (node)
652  {
653  found = true;
654  ++pos;
655  }
656  }
657  if (node && pos == strLen) // The word is complet in the automaton
658  return node->value();
659  return _empty;
660 }
661 
662 template <typename T>
663 edm::TrieNode<T> const *
665  return node(str.c_str(),str.size());
666  }
667 
668 template <typename T>
669 edm::TrieNode<T> const *
670 edm::Trie<T>::node(const char *str, unsigned strLen) const {
671  unsigned pos = 0;
672  bool found = true;
673  const TrieNode<T> *node = _initialNode;
674 
675  while (found && pos < strLen)
676  {
677  found = false;
678  node = node->subNodeByLabel(str[pos]);
679  if (node)
680  {
681  found = true;
682  ++pos;
683  }
684  }
685  return node;
686 }
687 
688 
689 template <typename T>
691 {
692  return _initialNode;
693 }
694 
695 template <typename T>
697 {
698  _factory->clear();
699  _initialNode = _factory->newNode(_empty);
700 }
701 
702 template <typename T>
703 void edm::Trie<T>::display(std::ostream &os)
704 {
705  if (_initialNode)
706  _initialNode->display(os, 0, 0);
707 }
708 
709 #endif // DataFormats_Common_Trie_h
void clear()
clear content of TrieNode
Definition: Trie.h:526
unsigned char m_label
Definition: Trie.h:278
void display(std::ostream &os, unsigned offset, unsigned char label) const
display content of node in output stream
Definition: Trie.h:511
TrieFactory & operator=(const TrieFactory &e)
avoid affectation operator
~Trie()
Definition: Trie.h:564
this class represent the node of a trie, it contains a link to a sub node and a link to a brother (no...
Definition: Trie.h:34
void _setBrother(TrieNode< T > *brother, unsigned char brotherLabel)
set brother (used by sort)
Definition: Trie.h:504
void addSubNode(unsigned char chr, TrieNode< T > *node)
Definition: Trie.h:478
const TrieNode< T > * _getBrother(unsigned char chr) const
Definition: Trie.h:415
TrieNode< T > * _lastNodes
Definition: Trie.h:62
void setValue(const T &val)
set value associed to node
Definition: Trie.h:391
const TrieNode< T > * brother() const
get brother (return 0x0 this node has no brother)
Definition: Trie.h:403
const TrieNode< T > * subNode() const
Definition: Trie.h:447
unsigned char _firstSubNodeLabel
character to go to first subnode
Definition: Trie.h:158
void clear()
Definition: Trie.h:353
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:20
TrieNode< T > * _firstSubNode
pointer to first sub node
Definition: Trie.h:156
bool setValue(Container &, const reco::JetBaseRef &, const JetExtendedData &)
associate jet with value. Returns false and associate nothing if jet is already associated ...
TrieNodeIter< T > end() const
mark end of iteration (std conforming)
Definition: Trie.h:386
Definition: Electron.h:4
TrieNode< T > * newNode(const T &value)
Definition: Trie.h:337
static void throwThis(Code category, char const *message0="", char const *message1="", char const *message2="", char const *message3="", char const *message4="")
TrieNode()
Definition: Trie.h:364
bool iterateTrieLeaves(V &v, TrieNode< T > const &n, std::string const &label="")
visits only leaf nodes
Definition: Trie.h:297
void display(std::ostream &os)
display content of trie in output stream
Definition: Trie.h:703
void insert(std::string const &str, const T &value)
Definition: Trie.h:619
~TrieNode()
Definition: Trie.h:372
TrieNodeIter< T > begin() const
initialize subnode iterator (std conforming)
Definition: Trie.h:381
void setEntry(std::string const &str, const T &value)
Definition: Trie.h:570
TrieNode< T > const node_base
Definition: Trie.h:251
const T & value() const
get value associed to node
Definition: Trie.h:397
Node _sequentialSearch(Node first, unsigned char label, unsigned char val) const
Definition: Trie.h:492
void increment()
Definition: Trie.h:265
const T & find(std::string const &str) const
get an entry in the Trie
Definition: Trie.h:636
TrieFactory()
avoid default constructor
void _addBrother(unsigned char chr, TrieNode< T > *brother)
add a new brother
Definition: Trie.h:428
const TrieNode< T > * initialNode() const
get initial TrieNode
Definition: Trie.h:690
T _empty
value returned when no match is found in trie
Definition: Trie.h:222
std::list< TrieNode< T > * > _allocatedNodes
Definition: Trie.h:61
#define end
Definition: vmac.h:37
Definition: value.py:1
bool insert(Storage &iStorage, ItemType *iItem, const IdTag &iIdTag)
Definition: HCMethods.h:49
TrieNodeIter< T > const_iterator
Definition: Trie.h:84
void errorInsert(std::string const &key)
Definition: Trie.h:545
void walkTrie(V &v, TrieNode< T > const &n, std::string const &label="")
visit each node of the trie
Definition: Trie.h:284
unsigned _nbUsedInLastNodes
Definition: Trie.h:63
void clear()
clear the content of trie
Definition: Trie.h:696
bool equal(self const &other) const
Definition: Trie.h:270
TrieNodeIter(node_base *p)
Definition: Trie.h:256
TrieFactory< T > * _factory
factory
Definition: Trie.h:224
Trie()
avoid default constructor
#define begin
Definition: vmac.h:30
HLT enums.
TrieNode< T > * _initialNode
first node of trie
Definition: Trie.h:226
unsigned char label() const
Definition: Trie.h:261
const TrieNode< T > * subNodeByLabel(unsigned char chr) const
Definition: Trie.h:465
T _value
value associed to this node
Definition: Trie.h:160
unsigned char brotherLabel() const
get brother label
Definition: Trie.h:441
node_base * m_node
Definition: Trie.h:277
unsigned _paquetSize
Definition: Trie.h:60
T first(std::pair< T, U > const &p)
long double T
unsigned char subNodeLabel() const
Definition: Trie.h:459
unsigned char _brotherLabel
character to go to brother (node with same father as this one)
Definition: Trie.h:154
const TrieNode< T > * node(std::string const &str) const
get node matching a string
Definition: Trie.h:664
TrieNode< T > * _addEntry(const char *str, unsigned strLen)
Definition: Trie.h:582
node_base & dereference() const
Definition: Trie.h:275
TrieNode< T > * _brother
pointer to brother (node with same father as this one)
Definition: Trie.h:152