CMS 3D CMS Logo

/afs/cern.ch/work/a/aaltunda/public/www/CMSSW_5_3_14/src/TrackingTools/TrajectoryCleaning/src/OtherHashMaps.h

Go to the documentation of this file.
00001 #ifndef TrackingTools_TrajectoryCleaning_src_OtherHashMaps
00002 #define TrackingTools_TrajectoryCleaning_src_OtherHashMaps
00003 
00004 #include <map>
00005 #include <vector>
00006 #include <memory>
00007 #include <list>
00008 #include <iterator>
00009 #include <boost/unordered_map.hpp>
00010 #include <boost/range.hpp>
00011 #include <boost/type_traits.hpp>
00012 #include <boost/mpl/and.hpp>
00013 #include <boost/mpl/logical.hpp>
00014 #include <boost/mpl/assert.hpp>
00015 #include <boost/mpl/if.hpp>
00016 
00017 #include <iostream>
00018 
00019 namespace cmsutil { 
00020 
00021 /*** The concept is the same of std::map<K, std::vector<V>>, but the implementation is much different (and the interface is not really compatible)
00022  *   It works only for K and V objects with trivial destructors (primitive types and bare pointers are fine)
00023  *   The main implementation difference w.r.t. unordered_map<K, std::vector<V>> is that this map is optimized to do very few memory allocations
00024  *   (in particular, clear does not redeme memory so if you just clear the map instead of deleting it you won't call malloc each time you fill it)
00025  *   The map doesn't rehash, so you should set a reasonable number of buckets (that you can change also when clearing the map)
00026  *   Note that too many buckets will make your map slow to clear (and possibly more prone to cache misses).
00027  *   Although it can take an allocator as template argument, it has been tested only with std::allocator.
00028  *   When used in the TrajectoryCleanerBySharedHits it works and it does improve the performance. Any other usecase was not checked at all.
00029  * */
00030 template<typename K, typename V, typename Hasher=boost::hash<K>, typename Equals=std::equal_to<K>, typename Alloc=std::allocator<V> >
00031 class SimpleAllocHashMultiMap {
00032     // taking Alloc definition from http://www.codeproject.com/KB/cpp/allocator.aspx
00033     BOOST_MPL_ASSERT( (boost::mpl::and_<boost::has_trivial_destructor<K>, boost::has_trivial_destructor<V> > ));
00034     public:
00035         typedef Hasher hasher;
00036         typedef SimpleAllocHashMultiMap<K,V,Hasher,Equals,Alloc> map_type;
00037         typedef typename boost::mpl::if_<typename boost::is_pointer<V>, V, V const &>::type value_ref; // otherwise we get problems with 'const' if V is a pointer
00038 
00039         SimpleAllocHashMultiMap(size_t buckets, size_t keyRowSize, size_t valueRowSize, size_t maxRows=50) ;
00040 
00041         ~SimpleAllocHashMultiMap() ;
00042 
00043         void clear(size_t newBucketSize=0) { 
00044             if (newBucketSize != 0) {
00045                 if (newBucketSize > bucketCapacity_) {
00046                     ptrAlloc_.deallocate(buckets_, bucketCapacity_);
00047                     bucketCapacity_ = newBucketSize;
00048                     buckets_ = ptrAlloc_.allocate(bucketCapacity_);
00049                 }
00050                 bucketSize_ = newBucketSize;
00051             }
00052             memset(buckets_, 0, bucketSize_ * sizeof(KeyItem *));
00053             currentKeyRow_ = keyRows_.begin();
00054             nextKeyItem_ = keyRows_.front(); keyEndMarker_ = nextKeyItem_ + keyRowSize_;
00055             currentValueRow_ = valueRows_.begin();
00056             nextValueItem_ = valueRows_.front(); valueEndMarker_ = nextValueItem_ + valueRowSize_;
00057             if ((keyRows_.size() > maxRows_) || (valueRows_.size() > maxRows_)) freeRows();
00058         }
00059         void freeRows() ;
00060 
00061         bool empty() const { return nextKeyItem_ == keyRows_.front(); }
00062 
00063         void insert(K const &key, value_ref value );
00064 
00065         struct ValueItem {
00066             ValueItem(ValueItem *next1, value_ref val) : next(next1), value(val) {}
00067             ValueItem * next;
00068             V           value;
00069             typedef V value_type;
00070             const value_type & operator()() const { return value; }
00071         };
00072 
00073         struct KeyItem {
00074             KeyItem (KeyItem *next1, K const &key1, ValueItem *val1) : key(key1), next(next1), value(val1) {}
00075             K           key;
00076             KeyItem   * next;
00077             ValueItem * value;
00078             typedef KeyItem value_type;
00079             const value_type & operator()() const { return *this; }
00080         };
00081 
00082         template<typename Item>
00083         class item_iterator { 
00084             public:
00085                 typedef ::std::forward_iterator_tag iterator_category;
00086                 typedef const typename Item::value_type  value_type;
00087                 typedef const value_type & reference;
00088                 typedef const value_type * pointer;
00089                 typedef ptrdiff_t difference_type;
00090                 typedef item_iterator<Item> self_type;
00091 
00092                 item_iterator() : it_(0) {}
00093                 item_iterator(const Item *it) : it_(it) {}
00094                 const value_type & operator*()  const { return   (*it_)(); }
00095                 const value_type * operator->() const { return & (*it_)(); }
00096                 self_type & operator++() { 
00097                     if (it_ != 0) it_ = it_->next; 
00098                     return *this; 
00099                 }
00100                 bool operator==(const self_type &other) const { return it_ == other.it_; }
00101                 bool operator!=(const self_type &other) const { return it_ != other.it_; }
00102                 bool good() const { return (it_ != 0); }
00103             private:
00104                 const Item *it_;
00105         };
00106         typedef item_iterator<ValueItem> value_iterator;
00107 
00108         value_iterator values(K const &key) ;
00109 
00110     private:
00111         typedef typename Alloc::template rebind<KeyItem>::other   KeyItemAllocator;
00112         typedef typename Alloc::template rebind<KeyItem *>::other KeyItemPtrAllocator;
00113         typedef typename Alloc::template rebind<ValueItem>::other ValueItemAllocator;
00114 
00115         // --- buckets ---
00116         size_t bucketSize_, bucketCapacity_;
00117         KeyItem ** buckets_;
00118         // --- keys ---
00119         size_t keyRowSize_; 
00120         std::list<KeyItem *> keyRows_;
00121         typename std::list<KeyItem *>::iterator currentKeyRow_; // last row that is currently in use. nextItem_ and the last valid item are both on this row. it is never rows_.end()
00122         KeyItem   *nextKeyItem_, *keyEndMarker_;
00123         // --- values ---
00124         size_t valueRowSize_;
00125         std::list<ValueItem *> valueRows_;
00126         typename std::list<ValueItem *>::iterator currentValueRow_; // last row that is currently in use. nextItem_ and the last valid item are both on this row. it is never rows_.end()
00127         ValueItem   *nextValueItem_, *valueEndMarker_;
00128         // --- other ---
00129         size_t maxRows_;
00130         Hasher hasher_;
00131         Equals eq_; 
00132         KeyItemAllocator    keyAlloc_;
00133         ValueItemAllocator  valueAlloc_;
00134         KeyItemPtrAllocator ptrAlloc_;
00135 
00136         KeyItem   * push_back_(K const &key,   KeyItem   *next) ; 
00137         ValueItem * push_back_(value_ref value, ValueItem *next) ; 
00138         KeyItem   & find_or_insert_(K const &key) ;
00139     public:
00140         void dump() {
00141             std::cout << "Dumping HASH MULTIMAP" << std::endl;
00142             std::cout << "  Key items: " << (std::distance(keyRows_.begin(), currentKeyRow_) * keyRowSize_ + (nextKeyItem_ - *currentKeyRow_)) << std::endl;
00143             std::cout << "  Value items: " << (std::distance(valueRows_.begin(), currentValueRow_) * valueRowSize_ + (nextValueItem_ - *currentValueRow_)) << std::endl;
00144             size_t row = 0;
00145             std::cout << "  Buckets (size " << bucketSize_ << ", capacity " << bucketCapacity_ << ")";
00146             for (KeyItem **p = buckets_; p != buckets_ + bucketSize_; ++p, ++row) {
00147                 std::cout << "      ["<<row<<"] " << *p << std::endl;
00148             }
00149             row = 0;
00150             std::cout << "  Key Items " << std::endl;
00151             for (typename std::list<KeyItem *>::iterator it = keyRows_.begin(), last = keyRows_.end(); it != last; ++it, ++row) {
00152                 KeyItem *lastI = *it + keyRowSize_;
00153                 std::cout << "   Key Row " << row <<  " (of size  " << keyRowSize_ << ")" << std::endl;
00154                 for (KeyItem *p = *it; p != lastI; ++p) {
00155                     std::cout << "      @ " << p << " [" << p->key << ", @" << p->value <<"], next = " << p->next << std::endl;
00156                     if ((it == currentKeyRow_) && (p == nextKeyItem_ - 1)) {
00157                         std::cout << "      ^^^ this was the last valid item." << std::endl;
00158                         last = 0; break;
00159                     }
00160                 }
00161                 if (lastI == 0) break;
00162             }
00163             row = 0;
00164             std::cout << "  Value Items " << std::endl;
00165             for (typename std::list<ValueItem *>::iterator it = valueRows_.begin(), last = valueRows_.end(); it != last; ++it, ++row) {
00166                 ValueItem *lastI = *it + valueRowSize_;
00167                 std::cout << "   Value Row " << row <<  " (of size  " << valueRowSize_ << ")" << std::endl;
00168                 for (ValueItem *p = *it; p != lastI; ++p) {
00169                     std::cout << "      @ " << p << " [" << p->value <<"], next = " << p->next << std::endl;
00170                     if ((it == currentValueRow_) && (p == nextValueItem_ - 1)) {
00171                         std::cout << "      ^^^ this was the last valid item." << std::endl;
00172                         last = 0; break;
00173                     }
00174                 }
00175                 if (lastI == 0) break;
00176             }
00177             std::cout << "  End of dump" << std::endl; 
00178             
00179         }
00180 };
00181 
00182 template<typename K, typename V, typename Hasher, typename Equals, typename Alloc>
00183 SimpleAllocHashMultiMap<K,V,Hasher,Equals,Alloc>::SimpleAllocHashMultiMap(size_t buckets, size_t keyRowSize, size_t valueRowSize, size_t maxRows) :
00184     bucketSize_(buckets), bucketCapacity_(bucketSize_),
00185     keyRowSize_(keyRowSize),
00186     valueRowSize_(valueRowSize),
00187     maxRows_(maxRows)
00188 {
00189     buckets_ = ptrAlloc_.allocate(bucketCapacity_);
00190     keyRows_.push_back(keyAlloc_.allocate(keyRowSize_));
00191     valueRows_.push_back(valueAlloc_.allocate(valueRowSize_));
00192     clear();
00193 }
00194 template<typename K, typename V, typename Hasher, typename Equals, typename Alloc>
00195 SimpleAllocHashMultiMap<K,V,Hasher,Equals,Alloc>::~SimpleAllocHashMultiMap() 
00196 {
00197     for (typename std::list<KeyItem *>::iterator it = keyRows_.begin(), last = keyRows_.end(); it != last; ++it) {
00198         keyAlloc_.deallocate(*it, keyRowSize_);
00199     }
00200     for (typename std::list<ValueItem *>::iterator it = valueRows_.begin(), last = valueRows_.end(); it != last; ++it) {
00201         valueAlloc_.deallocate(*it, valueRowSize_);
00202     }
00203     ptrAlloc_.deallocate(buckets_, bucketCapacity_);
00204 }
00205 
00206 template<typename K, typename V, typename Hasher, typename Equals, typename Alloc>
00207 void 
00208 SimpleAllocHashMultiMap<K,V,Hasher,Equals,Alloc>::freeRows() {
00209    if (keyRows_.size() > maxRows_) {
00210         //std::cerr << "Freeing key rows, current size is " << keyRows_.size() << std::endl;
00211         typename std::list<KeyItem *>::iterator it = keyRows_.begin(), last = keyRows_.end(); 
00212         for (std::advance(it, maxRows_); it != last; ++it) {
00213             keyAlloc_.deallocate(*it, keyRowSize_);
00214         }
00215         keyRows_.resize(maxRows_);
00216    } 
00217    if (valueRows_.size() > maxRows_) {
00218         //std::cerr << "Freeing value rows, current size is " << valueRows_.size() << std::endl;
00219         typename std::list<ValueItem *>::iterator it = valueRows_.begin(), last = valueRows_.end(); 
00220         for (std::advance(it, maxRows_); it != last; ++it) {
00221             valueAlloc_.deallocate(*it, valueRowSize_);
00222         }
00223         valueRows_.resize(maxRows_);
00224    } 
00225 }
00226 
00227 template<typename K, typename V, typename Hasher, typename Equals, typename Alloc>
00228 typename SimpleAllocHashMultiMap<K,V,Hasher,Equals,Alloc>::KeyItem * 
00229 SimpleAllocHashMultiMap<K,V,Hasher,Equals,Alloc>::push_back_(K const &key, KeyItem *next) { 
00230     if (nextKeyItem_ == keyEndMarker_) {
00231         ++currentKeyRow_; 
00232         if (currentKeyRow_ == keyRows_.end()) {
00233             keyRows_.push_back(keyAlloc_.allocate(keyRowSize_));
00234             currentKeyRow_ = keyRows_.end(); --currentKeyRow_; // end - 1 doesn't work!
00235         }
00236         nextKeyItem_  = *currentKeyRow_;
00237         keyEndMarker_ = nextKeyItem_ + keyRowSize_; 
00238     }
00239     keyAlloc_.construct(nextKeyItem_, KeyItem(next, key, 0));
00240     nextKeyItem_++;
00241     return (nextKeyItem_-1);
00242 }
00243 template<typename K, typename V, typename Hasher, typename Equals, typename Alloc>
00244 typename SimpleAllocHashMultiMap<K,V,Hasher,Equals,Alloc>::ValueItem * 
00245 SimpleAllocHashMultiMap<K,V,Hasher,Equals,Alloc>::push_back_(value_ref value, ValueItem *next) { 
00246     if (nextValueItem_ == valueEndMarker_) {
00247         ++currentValueRow_; 
00248         if (currentValueRow_ == valueRows_.end()) {
00249             valueRows_.push_back(valueAlloc_.allocate(valueRowSize_));
00250             currentValueRow_ = valueRows_.end(); --currentValueRow_; // end - 1 doesn't work!
00251         }
00252         nextValueItem_  = *currentValueRow_;
00253         valueEndMarker_ = nextValueItem_ + valueRowSize_; 
00254     }
00255     valueAlloc_.construct(nextValueItem_, ValueItem(next, value));
00256     nextValueItem_++;
00257     return (nextValueItem_-1);
00258 }
00259 
00260 template<typename K, typename V, typename Hasher, typename Equals, typename Alloc>
00261 typename SimpleAllocHashMultiMap<K,V,Hasher,Equals,Alloc>::KeyItem &
00262 SimpleAllocHashMultiMap<K,V,Hasher,Equals,Alloc>::find_or_insert_(K const &key) {
00263     //std::cout << "Find or insert for key " << key << std::endl;
00264     size_t hash = hasher_(key);
00265     KeyItem * & buck = buckets_[hash % bucketSize_];
00266     KeyItem * curr = buck;
00267     while (curr) {
00268         if (eq_(curr->key, key)) {
00269             //std::cout << "  Key " << key << " was found." << std::endl;
00270             return *curr;
00271         } 
00272         curr = curr->next;
00273     } 
00274     buck = push_back_(key, buck);
00275     return *buck;
00276 }
00277 
00278 template<typename K, typename V, typename Hasher, typename Equals, typename Alloc>
00279 void SimpleAllocHashMultiMap<K,V,Hasher,Equals,Alloc>::insert(K const &key, value_ref value) {
00280     //std::cout << "Pushing back value " << value << " for key " << key << std::endl;
00281     KeyItem &k = find_or_insert_(key);
00282     //std::cout << "Key " << (k.value ? "exists" : " is new") << std::endl;
00283     k.value = push_back_(value, k.value);
00284 }
00285 
00286 template<typename K, typename V, typename Hasher, typename Equals, typename Alloc>
00287 typename SimpleAllocHashMultiMap<K,V,Hasher,Equals,Alloc>::value_iterator
00288 SimpleAllocHashMultiMap<K,V,Hasher,Equals,Alloc>::values(K const &key) {
00289     //std::cout << "Gettinv values for key " << key << std::endl;
00290     size_t hash = hasher_(key);
00291     for (KeyItem * curr = buckets_[hash % bucketSize_]; curr; curr = curr->next) {
00292         if (eq_(curr->key, key)) return value_iterator(curr->value);
00293     }
00294     return value_iterator();
00295 }
00296 
00297 
00298 /*** Very very simple map implementation
00299  *   It's just a std::vector<pair<key,value>>, and the operator[] does a linear search to find the key (it's O(N) time, both if the key exists and if it doesn't)
00300  *   Anyway, if your map is very small and if you clear it often, it performs better than more complex variants
00301  *   Semantics is as std::map, except that only very few methods are implemented.
00302  * */
00303 template<typename K, typename V>
00304 class UnsortedDumbVectorMap {
00305     public:
00306         typedef std::pair<K,V> value_type;
00307         typedef typename std::vector<value_type>::const_iterator const_iterator;
00308         typedef typename std::vector<value_type>::iterator       iterator; // but please don't mutate the keys
00309 
00310         void clear() { data_.clear(); }
00311         bool empty() const { return data_.empty(); }
00312         const_iterator begin() const { return data_.begin(); } 
00313         const_iterator end()   const { return data_.end(); } 
00314         iterator begin()  { return data_.begin(); } 
00315         iterator end()    { return data_.end(); } 
00316         
00317         V & operator[](const K &k) {
00318             for (typename std::vector<value_type>::iterator it = data_.begin(), ed = data_.end(); it != ed; ++it) {
00319                 if (it->first == k) return it->second;
00320             }
00321             data_.push_back(value_type(k,V()));
00322             return data_.back().second;
00323         }
00324 
00325 
00326         UnsortedDumbVectorMap() {}
00327         
00328     private:
00329         std::vector<value_type> data_;
00330 
00331 };
00332 
00333 } // namespace 
00334 
00335 #endif