Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

HashTable.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 //
00014 // HashTable
00015 //
00016 // 9 January 2004 - tds
00017 //
00018 
00019 #ifndef LEMUR_HASHTABLE_HPP
00020 #define LEMUR_HASHTABLE_HPP
00021 
00022 #include <utility>
00023 
00024 //
00025 // GenericHash<_Key>
00026 //
00027 
00028 template<class _Key>
00029 class GenericHash { 
00030 public:
00031   int operator() ( const _Key& k ) const {
00032     return (int) k;
00033   }
00034 };
00035 
00036 //
00037 // GenericComparator<_Key>
00038 //
00039 
00040 template<class _Key>
00041 class GenericComparator {
00042 public:
00043   int operator() ( const _Key& one, const _Key& two ) const {
00044     return (int) (one - two);
00045   }
00046 };
00047 
00048 template<>
00049 class GenericHash<const char*> {
00050 public:
00051   int operator() ( const char* const& kp ) const {
00052     int hash = 0;
00053     const char* k = kp;
00054     for( ; *k; k++ ){
00055       hash *= 7;
00056       hash += *k;
00057     }
00058     return hash;
00059   }
00060 };
00061 
00062 template<>
00063 class GenericComparator<const char*> {
00064 public:
00065   int operator () ( const char* const& one, const char* const& two ) const {
00066     return strcmp( one, two );
00067   }
00068 };
00069 
00070 //
00071 // HashBucket<_Key, _Value>
00072 //
00073 
00074 template<class _Key, class _Value>
00075 struct HashBucket {
00076   _Key key;
00077   _Value value;
00078   HashBucket<_Key, _Value>* next;
00079 
00080   HashBucket() : next( (HashBucket<_Key, _Value>*) ~0 ) {};
00081   HashBucket( const _Key& k, HashBucket<_Key, _Value>* n ) : key(k), next(n) {};
00082   HashBucket( const _Key& k, const _Value& v, HashBucket<_Key, _Value>* n ) : key(k), value(v), next(n) {};
00083   
00084   ~HashBucket() {
00085     next = (HashBucket<_Key, _Value>*) ~0;
00086   }
00087 
00088   bool empty() {
00089     return next == (HashBucket<_Key, _Value>*) ~0;
00090   }
00091 
00092   void setEmpty() {
00093     next = (HashBucket<_Key, _Value>*) ~0;
00094   }
00095 };
00096 
00097 //
00098 // HashTableIterator<_Key, _Value, _Comparator>
00099 //
00100 
00101 template<class _Key, class _Value, class _Comparator>
00102 class HashTableIterator {
00103 private:
00104   typedef HashBucket<_Key, _Value> bucket_type;
00105   bucket_type* _table;
00106   bucket_type* _currentEntry;
00107   size_t _currentBucket;
00108   size_t _totalBuckets;
00109   std::pair<_Key*, _Value*> _pair;
00110 
00111   void next() {
00112     // already at end
00113     if( _currentBucket == -1 )
00114       return;
00115 
00116     // in a chain with more entries left
00117     if( _currentEntry && _currentEntry->next != 0 ) {
00118       _currentEntry = _currentEntry->next;
00119       return;
00120     }
00121 
00122     if( _currentEntry )
00123       _currentBucket++;
00124 
00125     for( ; _currentBucket < _totalBuckets; _currentBucket++ ) {
00126       _currentEntry = &_table[_currentBucket];
00127 
00128       if( ! _currentEntry->empty() ) {
00129         return;
00130       }
00131     }
00132 
00133     // none left
00134     _currentEntry = 0;
00135     _currentBucket = -1;
00136   }
00137 
00138 public:
00139   HashTableIterator() {
00140     _currentBucket = -1;
00141     _currentEntry = 0;
00142   }
00143 
00144   HashTableIterator( bucket_type* table, size_t totalBuckets ) {
00145     _table = table;
00146     _totalBuckets = totalBuckets;
00147 
00148     _currentBucket = 0;
00149     _currentEntry = 0;
00150     next();
00151   }
00152 
00153   bool operator == ( const HashTableIterator& other ) {
00154     if( other._currentEntry == _currentEntry &&
00155         other._currentBucket == _currentBucket )
00156         return true;
00157     return false;
00158   }
00159 
00160   bool operator != ( const HashTableIterator& other ) {
00161     if( other._currentEntry == _currentEntry &&
00162         other._currentBucket == _currentBucket )
00163         return false;
00164     return true;
00165   }
00166 
00167   void operator++ ( int ) {
00168     next();
00169   }
00170 
00171   std::pair<_Key*, _Value*>& operator* () {
00172     _pair.first = &_currentEntry->key;
00173     _pair.second = &_currentEntry->value;
00174     return _pair;
00175   }
00176 
00177   std::pair<_Key*, _Value*>* operator-> () {
00178     return &(*(*this));
00179   }
00180 };
00181 
00182 template<class _Key, class _Value, class _HashFunction = GenericHash<_Key>, class _Comparator = GenericComparator<_Key> >
00183 class HashTable {
00184 public:
00185   friend class HashTableIterator<_Key, _Value, _Comparator>;
00186 
00187   typedef HashBucket<_Key, _Value> bucket_type;
00188   typedef _Key key_type;
00189   typedef _Value value_type;
00190   typedef _HashFunction hash_type;
00191   typedef _Comparator compare_type;
00192   typedef class HashTableIterator<_Key, _Value, _Comparator> iterator;
00193 
00194 private:
00195   bucket_type* _table;
00196   hash_type _hash;
00197   compare_type _compare;
00198   size_t _buckets;
00199   iterator _end;
00200   size_t _count;
00201 
00202   bucket_type* _parentBucket( const _Key& k ) const {
00203     size_t index = _hash(k) % _buckets;
00204     return &_table[index];
00205   }
00206 
00207 public:
00208   HashTable( size_t size = 16384 ) {
00209     _buckets = size / sizeof(bucket_type);
00210     _table = reinterpret_cast<bucket_type*>(new char[_buckets * sizeof(bucket_type)]);
00211     _count = 0;
00212     
00213     for( size_t i=0; i<_buckets; i++ ) {
00214       _table[i].setEmpty();
00215     }
00216   }
00217 
00218   ~HashTable() {
00219     clear();
00220     delete[] reinterpret_cast<char*>(_table);
00221   }
00222 
00223   _Value* find( const _Key& k ) const {
00224     bucket_type* bucket = _parentBucket(k);
00225 
00226     if( bucket->empty() ) {
00227       return 0;
00228     } else {
00229       while(1) {
00230         if( _compare( k, bucket->key ) == 0 ) {
00231             return &bucket->value;
00232         }
00233         
00234         if( bucket->next == 0 )
00235           return 0;
00236         else
00237           bucket = bucket->next;
00238       }
00239     }
00240   }
00241 
00242   _Value* insert( const _Key& k ) {
00243     bucket_type* bucket = _parentBucket(k);
00244     _count++;
00245 
00246     if( bucket->empty() ) {
00247       new(bucket) bucket_type( k, 0 );
00248       return &bucket->value;
00249     } else {
00250       bucket_type* newBucket = new bucket_type( k, bucket->next );
00251       bucket->next = newBucket;
00252       return &newBucket->value;
00253     }
00254   }
00255 
00256   _Value* insert( const _Key& k, const _Value& v ) {
00257     bucket_type* bucket = _parentBucket(k);
00258     _count++;
00259 
00260     if( bucket->empty() ) {
00261       new(bucket) bucket_type( k, v, 0 );
00262       return &bucket->value;
00263     } else {
00264       bucket_type* newBucket = new bucket_type( k, v, bucket->next );
00265       bucket->next = newBucket;
00266       return &newBucket->value;
00267     }
00268   }
00269 
00270   void remove( const _Key& k ) {
00271     bucket_type* bucket = _parentBucket(k);
00272 
00273     if( !bucket->empty() ) {
00274       if( _compare( k, bucket->key ) == 0 ) {
00275         if( bucket->next ) {
00276           bucket_type* nextBucket = bucket->next;
00277           bucket->~bucket_type();
00278           new(bucket) bucket_type( nextBucket->key, nextBucket->value, nextBucket->next );
00279           delete nextBucket;
00280         } else {
00281           bucket->~bucket_type();
00282         }
00283         _count--;
00284       } else {
00285         bucket_type* parent = bucket;
00286         bucket = parent->next;
00287 
00288         while( bucket ) {
00289           if( _compare( k, bucket->key ) == 0 ) {
00290             _count--;
00291             parent->next = bucket->next;
00292             delete bucket;
00293             break;
00294           }
00295           parent = bucket;
00296           bucket = bucket->next;
00297         }
00298       }
00299     }
00300   }
00301 
00302   void clear() {
00303     for( size_t i=0; i<_buckets; i++ ) {
00304       if( !_table[i].empty() ) {
00305         bucket_type* current = _table[i].next;
00306         bucket_type* next;
00307 
00308         _table[i].~bucket_type();
00309 
00310         while( current ) {
00311           next = current->next;
00312           delete current;
00313           current = next;
00314         }
00315       }
00316     }
00317     _count = 0;
00318   }
00319 
00320   const iterator& end() {
00321     return _end;
00322   }
00323 
00324   iterator begin() {
00325     return iterator( _table, _buckets );
00326   }
00327 
00328   size_t size() {
00329     return _count;
00330   }
00331 };
00332 
00333 #endif // LEMUR_HASHTABLE_HPP

Generated on Wed Nov 3 12:58:57 2004 for Lemur Toolkit by doxygen1.2.18