00001 /*========================================================================== 00002 * Copyright (c) 2001 Carnegie Mellon University. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 00013 // ------------------------------------------------------------------- 00014 // ISet.hpp (set with index) 00015 // ALB 00016 // Template set class derived from PSet.hpp 00017 // . retrieve objects in the order in which they were inserted 00018 // via operator[int] 00019 // . growable 00020 // ------------------------------------------------------------------- 00021 00022 #ifndef _ISETH_ 00023 #define _ISETH_ 00024 00025 #include "PSet.hpp" 00026 00027 namespace lemur 00028 { 00029 namespace utility 00030 { 00031 00032 template <class ObjType> 00033 class ISet : public PSet<ObjType> 00034 { 00035 public: 00036 ISet(): PSet<ObjType>(), index(0) {} 00037 ISet(const int maxSize_p): index(0) { open(maxSize_p); } 00038 ~ISet() { close(); } 00039 00040 void open(const int maxSize_p) { 00041 PSet<ObjType>::open(maxSize_p); 00042 index = new typename PSet<ObjType>::SET_NODE* [this->maxSize+1]; 00043 memset(index, 0, (this->maxSize+1)*sizeof(typename PSet<ObjType>::SET_NODE*)); 00044 } 00045 00046 void close() { 00047 PSet<ObjType>::close(); 00048 delete [] index; 00049 index=0; 00050 } 00051 00052 void clear() { 00053 if (this->maxSize==0) return; 00054 close(); 00055 open(this->maxSize); 00056 } 00057 00058 int size() const { return this->currentSize; } 00059 00060 int add(const ObjType& u) { 00061 typename PSet<ObjType>::SET_NODE *sn = PSet<ObjType>::internalAdd(u); 00062 if (sn==0) return -1; 00063 index[sn->idx] = sn; 00064 int retval = sn->idx; 00065 if (++this->currentSize > this->maxSize) grow((int) (this->currentSize*GROW_FACTOR+1)); 00066 return retval; // grow may void sn 00067 } 00068 00069 int remove(const ObjType& u) { // remove u from set: returns 1 iff u was in set 00070 const int idx = internalRemove(u); 00071 if (idx==-1) return 0; // not a member 00072 this->currentSize--; 00073 return 1; // was a member (not anymore) 00074 } 00075 00076 int operator+=(const ObjType& u) // add an elt to set: returns 1 if added. 00077 { return add(u); } 00078 00079 int operator-=(const ObjType& u)// remove elt from set: returns 1 if removed. 00080 { return remove(u); } 00081 00082 // NB: When user removes elts. from set, the set is re-indexed, so 00083 // what is the n'th elt. now may be the n-m'th elt. sometime later 00084 ObjType& operator[](const int idx) const { // get n'th elt 00085 assert(idx<this->currentSize); 00086 return index[idx]->u; 00087 } 00088 00089 int operator[](const ObjType& u) const { // get idx of u, -1 if not there 00090 int hashval = computeHash(u); 00091 typename PSet<ObjType>::SET_NODE *p = this->hashTable[hashval]; 00092 while(p!=0 && !(p->u==u)) p=p->next; 00093 return ((p==0)? -1: p->idx); 00094 } 00095 00096 void grow(const int newSize) { 00097 this->maxSize = newSize; 00098 this->hashTableSize = this->smallestPrimeGreaterThan((int) (this->maxSize*SPARSENESS)); 00099 typename PSet<ObjType>::SET_NODE **newIndex = new typename PSet<ObjType>::SET_NODE* [this->maxSize+1]; 00100 typename PSet<ObjType>::SET_NODE **newHashTable = new typename PSet<ObjType>::SET_NODE* [this->hashTableSize]; 00101 memset(newHashTable, 0, this->hashTableSize*sizeof(typename PSet<ObjType>::SET_NODE *)); 00102 for (int i=0; i<this->currentSize; i++) { 00103 typename PSet<ObjType>::SET_NODE *sn = index[i]; 00104 const int hashval = computeHash(sn->u); 00105 typename PSet<ObjType>::SET_NODE *snNew = createNode(sn->u); 00106 snNew->idx = i; 00107 snNew->next = newHashTable[hashval]; 00108 newHashTable[hashval] = snNew; 00109 deleteNode(sn); 00110 newIndex[i] = snNew; 00111 } 00112 delete [] index; 00113 delete [] this->hashTable; 00114 index = newIndex; 00115 this->hashTable = newHashTable; 00116 } 00117 00118 protected: 00119 int internalRemove(const ObjType &u) { 00120 // Return idx of removed node. For efficiency, we re-index the set 00121 // so that what once was the last member now is the idx'th member. 00122 // (rather than renumbering the entire set starting from idx) 00123 int idx=PSet<ObjType>::internalRemove(u); 00124 if (idx==-1) return -1; 00125 index[idx] = index[this->currentSize-1]; 00126 index[idx]->idx = idx; 00127 index[this->currentSize-1] = 0; 00128 return idx; 00129 } 00130 00131 int internalRemove(const ObjType &u, const int idx) { 00132 PSet<ObjType>::internalRemove(u); 00133 index[idx] = index[this->currentSize-1]; 00134 if (index[idx]->idx != -2) index[idx]->idx=idx; 00135 index[this->currentSize-1] = 0; 00136 return idx; 00137 } 00138 00139 protected: 00140 typename PSet<ObjType>::SET_NODE* (*index); // goes from [dense idx] -> node 00141 }; 00142 } 00143 } 00144 00145 #endif