00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef LEMUR_HASHTABLE_HPP
00020 #define LEMUR_HASHTABLE_HPP
00021
00022 #include <utility>
00023 #include "indri/RegionAllocator.hpp"
00024 namespace indri
00025 {
00026 namespace utility
00027 {
00028
00029
00030
00031
00032
00033 template<class _Key>
00034 class GenericHash {
00035 public:
00036 size_t operator() ( const _Key& k ) const {
00037 return (size_t) k;
00038 }
00039 };
00040
00041
00042
00043
00044
00045 template<class _Key>
00046 class GenericComparator {
00047 public:
00048 size_t operator() ( const _Key& one, const _Key& two ) const {
00049 return (size_t) (one - two);
00050 }
00051 };
00052
00053
00054
00055
00056
00057 template<>
00058 class GenericHash<const char*> {
00059 public:
00060 size_t operator() ( const char* const& kp ) const {
00061
00062 size_t hash = 5381;
00063 const char* k = kp;
00064 char c;
00065
00066 for( ; c = *k; k++ ){
00067 hash = ((hash << 5) + hash) + c;
00068 }
00069
00070 return hash;
00071 }
00072 };
00073
00074
00075
00076
00077
00078 template<>
00079 class GenericComparator<const char*> {
00080 public:
00081 size_t operator () ( const char* const& one, const char* const& two ) const {
00082 return strcmp( one, two );
00083 }
00084 };
00085
00086
00087
00088
00089
00090
00091 template<>
00092 class GenericHash<std::string> {
00093 public:
00094 size_t operator() ( const std::string& key ) const {
00095 GenericHash<const char*> charHash;
00096 return charHash( key.c_str() );
00097 }
00098 };
00099
00100
00101
00102
00103
00104
00105 template<>
00106 class GenericComparator<std::string> {
00107 public:
00108 size_t operator() ( const std::string& one, const std::string& two ) const {
00109 return one.compare(two);
00110 }
00111 };
00112
00113
00114
00115
00116
00117 template<class _Key, class _Value>
00118 struct HashBucket {
00119 _Key key;
00120 _Value value;
00121 HashBucket<_Key, _Value>* next;
00122
00123 HashBucket() : next( 0 ) {};
00124 HashBucket( const _Key& k, HashBucket<_Key, _Value>* n ) : key(k), next(n) {};
00125 HashBucket( const _Key& k, const _Value& v, HashBucket<_Key, _Value>* n ) : key(k), value(v), next(n) {};
00126 };
00127
00128
00129
00130
00131
00132 template<class _Key, class _Value, class _Comparator>
00133 class HashTableIterator {
00134 private:
00135 typedef HashBucket<_Key, _Value> bucket_type;
00136 bucket_type** _table;
00137 bucket_type* _currentEntry;
00138 size_t _currentBucket;
00139 size_t _totalBuckets;
00140 std::pair<_Key*, _Value*> _pair;
00141
00142 void next() {
00143
00144 if( _currentBucket == (size_t)-1 )
00145 return;
00146
00147
00148 if( _currentEntry && _currentEntry->next != 0 ) {
00149 _currentEntry = _currentEntry->next;
00150 return;
00151 }
00152
00153 if( _currentEntry )
00154 _currentBucket++;
00155
00156 for( ; _currentBucket < _totalBuckets; _currentBucket++ ) {
00157 _currentEntry = _table[_currentBucket];
00158
00159 if( _currentEntry ) {
00160 return;
00161 }
00162 }
00163
00164
00165 _currentEntry = 0;
00166 _currentBucket = (size_t)-1;
00167 }
00168
00169 public:
00170 HashTableIterator() {
00171 _currentBucket = (size_t)-1;
00172 _currentEntry = 0;
00173 }
00174
00175 HashTableIterator( bucket_type** table, size_t totalBuckets ) {
00176 _table = table;
00177 _totalBuckets = totalBuckets;
00178
00179 _currentBucket = 0;
00180 _currentEntry = 0;
00181 next();
00182 }
00183
00184 bool operator == ( const HashTableIterator& other ) {
00185 if( other._currentEntry == _currentEntry &&
00186 other._currentBucket == _currentBucket )
00187 return true;
00188 return false;
00189 }
00190
00191 bool operator != ( const HashTableIterator& other ) {
00192 if( other._currentEntry == _currentEntry &&
00193 other._currentBucket == _currentBucket )
00194 return false;
00195 return true;
00196 }
00197
00198 void operator++ ( int ) {
00199 next();
00200 }
00201
00202 std::pair<_Key*, _Value*>& operator* () {
00203 _pair.first = &_currentEntry->key;
00204 _pair.second = &_currentEntry->value;
00205 return _pair;
00206 }
00207
00208 std::pair<_Key*, _Value*>* operator-> () {
00209 return &(*(*this));
00210 }
00211 };
00212
00213 template<class _Key, class _Value, class _HashFunction = GenericHash<_Key>, class _Comparator = GenericComparator<_Key> >
00214 class HashTable {
00215 public:
00216 friend class HashTableIterator<_Key, _Value, _Comparator>;
00217
00218 typedef HashBucket<_Key, _Value> bucket_type;
00219 typedef _Key key_type;
00220 typedef _Value value_type;
00221 typedef _HashFunction hash_type;
00222 typedef _Comparator compare_type;
00223 typedef class HashTableIterator<_Key, _Value, _Comparator> iterator;
00224
00225 private:
00226 indri::utility::RegionAllocator* _allocator;
00227 bucket_type** _table;
00228 hash_type _hash;
00229 compare_type _compare;
00230 size_t _buckets;
00231 iterator _end;
00232 size_t _count;
00233
00234 void _deleteBucket( bucket_type* b ) {
00235 if( !_allocator )
00236 delete b;
00237 }
00238
00239 bucket_type* _newBucket( const _Key& k, bucket_type* p ) {
00240 bucket_type* b = 0;
00241
00242 if( _allocator ) {
00243 b = (bucket_type*) _allocator->allocate( sizeof (bucket_type) );
00244 new(b) bucket_type( k, p );
00245 } else {
00246 b = new bucket_type( k, p );
00247 }
00248
00249 return b;
00250 }
00251
00252 bucket_type* _newBucket( const _Key& k, const _Value& v, bucket_type* p ) {
00253 bucket_type* b = 0;
00254
00255 if( _allocator ) {
00256 b = (bucket_type*) _allocator->allocate( sizeof (bucket_type) );
00257 new(b) bucket_type( k, v, p );
00258 } else {
00259 b = new bucket_type( k, v, p );
00260 }
00261
00262 return b;
00263 }
00264
00265 bucket_type** _parentBucket( const _Key& k ) const {
00266 size_t index = _hash(k) % _buckets;
00267 return &_table[index];
00268 }
00269
00270 public:
00271 HashTable( size_t size = 16384, indri::utility::RegionAllocator* allocator = 0 ) {
00272 _allocator = allocator;
00273 _buckets = size / sizeof(bucket_type*);
00274 _table = reinterpret_cast<bucket_type**>(new char[_buckets * sizeof(bucket_type*)]);
00275 _count = 0;
00276
00277 memset( _table, 0, _buckets * sizeof(bucket_type*) );
00278 }
00279
00280 ~HashTable() {
00281 clear();
00282 delete[] reinterpret_cast<char*>(_table);
00283 }
00284
00285 _Value* find( const _Key& k ) const {
00286 bucket_type* bucket = *_parentBucket(k);
00287
00288 for( ; bucket; bucket = bucket->next ) {
00289 if( _compare( k, bucket->key ) == 0 ) {
00290 return &bucket->value;
00291 }
00292 }
00293
00294 return 0;
00295 }
00296
00297 _Value* insert( const _Key& k ) {
00298 bucket_type** bucket = _parentBucket(k);
00299 _count++;
00300
00301
00302 while( *bucket )
00303 bucket = &(*bucket)->next;
00304
00305
00306 bucket_type* newItem = _newBucket( k, 0 );
00307 *bucket = newItem;
00308
00309 return &newItem->value;
00310 }
00311
00312 _Value* insert( const _Key& k, const _Value& v ) {
00313 bucket_type** bucket = _parentBucket(k);
00314 _count++;
00315
00316
00317 while( *bucket )
00318 bucket = &(*bucket)->next;
00319
00320
00321 bucket_type* newItem = _newBucket( k, v, 0 );
00322 *bucket = newItem;
00323
00324 return &newItem->value;
00325 }
00326
00327 void remove( const _Key& k ) {
00328 bucket_type** bucket = _parentBucket(k);
00329
00330 while( *bucket ) {
00331 if( _compare( k, (*bucket)->key ) == 0 ) {
00332 bucket_type* nextItem = (*bucket)->next;
00333 bucket_type* thisItem = (*bucket);
00334
00335 *bucket = nextItem;
00336 _deleteBucket( thisItem );
00337 _count--;
00338 break;
00339 }
00340
00341 bucket = &(*bucket)->next;
00342 }
00343 }
00344
00345 void clear() {
00346 if( _allocator ) {
00347 memset( _table, 0, sizeof(bucket_type*) * _buckets );
00348 } else {
00349 for( size_t i=0; i<_buckets; i++ ) {
00350 bucket_type* item = _table[i];
00351
00352 while( item ) {
00353 bucket_type* nextItem = item->next;
00354 _deleteBucket( item );
00355 item = nextItem;
00356 }
00357
00358 _table[i] = 0;
00359 }
00360 }
00361
00362 _count = 0;
00363 }
00364
00365 const iterator& end() {
00366 return _end;
00367 }
00368
00369 iterator begin() {
00370 return iterator( _table, _buckets );
00371 }
00372
00373 size_t size() {
00374 return _count;
00375 }
00376 };
00377 }
00378 }
00379
00380 #endif // LEMUR_HASHTABLE_HPP