00001 /*========================================================================== 00002 * Copyright (c) 2001 Carnegie Mellon University. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 00013 // ------------------------------------------------------------------- 00014 // PSet.hpp (Plain set) 00015 // ALB 00016 // 00017 // Template set class (base class of family of set classes) 00018 // . add objects using operator+() or add() 00019 // . remove objects using operator-() or remove() 00020 // . query for the presence of an object in set via operator[object] 00021 // . if user specifies no removals will occur, uses (efficient) block memory 00022 // 00023 // Required member functions of template ObjType: 00024 // hash(): returns a 32-bit hash of object 00025 // ==: returns 1 if objects are equal 00026 // =: assignment operator 00027 // 00028 // Note also: ISet (set with index) 00029 // ------------------------------------------------------------------- 00030 00031 #ifndef _PSETH_ 00032 #define _PSETH_ 00033 00034 #include "common_headers.hpp" 00035 #include <cmath> 00036 #include <memory.h> 00037 #include <cassert> 00038 namespace lemur 00039 { 00040 namespace utility 00041 { 00042 00043 static const float SPARSENESS = 1.5; 00044 static const float GROW_FACTOR = 2.0; 00045 00046 00047 template <class ObjType> 00048 class PSet 00049 { 00050 protected: 00051 struct SET_NODE { 00052 ObjType u; 00053 int idx; // used in derived classes, but not here 00054 struct SET_NODE *next; 00055 } ; 00056 00057 public: 00058 PSet(): currentSize(0),maxSize(0),hashTable(0) {} 00059 PSet(const int maxSize_p) : currentSize(0),hashTable(0) { open(maxSize_p); } 00060 ~PSet() { close(); } 00061 00062 const int size() const { return currentSize; } 00063 const int maxsize() const { return maxSize; } 00064 00065 void open(const int maxSize_p) { 00066 assert(hashTable==0); // cannot call open() twice! 00067 assert(maxSize_p>0); 00068 maxSize = maxSize_p; 00069 if (maxSize<2) maxSize=2; 00070 hashTableSize = smallestPrimeGreaterThan((int) (maxSize*SPARSENESS)); 00071 hashTable = new SET_NODE * [hashTableSize]; 00072 memset(hashTable, 0, hashTableSize*sizeof(SET_NODE *)); 00073 setNodeSize = sizeof(SET_NODE); 00074 } 00075 00076 void close() { 00077 if (hashTable) { 00078 for (int i=0; i<hashTableSize; i++) { 00079 SET_NODE *node=hashTable[i]; 00080 while (node) { 00081 SET_NODE *next = node->next; 00082 delete node; 00083 node = next; 00084 } 00085 } 00086 delete [] hashTable; 00087 } 00088 hashTable=0; 00089 currentSize=0; 00090 } 00091 00092 void clear() { 00093 if (maxSize==0) return; 00094 close(); 00095 open(maxSize); 00096 } 00097 00098 int add(const ObjType& u) { 00099 SET_NODE *sn = internalAdd(u); 00100 if (sn==0) return -1; // already a member 00101 assert(currentSize++<=maxSize); 00102 return sn->idx; // new member 00103 } 00104 00105 int remove(const ObjType& u) { // remove u from set: returns 1 iff u was in set 00106 const int idx = internalRemove(u); 00107 if (idx==-1) return 0; // not a member 00108 currentSize--; 00109 return 1; // was a member (not anymore) 00110 } 00111 00112 int operator+=(const ObjType& u) // add an elt to set: returns 1 if added. 00113 { return add(u); } 00114 00115 int operator-=(const ObjType& u)// remove elt from set: returns 1 if removed. 00116 { return remove(u); } 00117 00118 const ObjType &operator[](const int idx) const { 00119 int a; 00120 a=idx; // suppress compiler warnings about not using idx 00121 cerr <<"PSet: operator[] (int) not supported!"<<endl; 00122 abort(); 00123 } 00124 00125 int operator[] (const ObjType& u) const { // returns +1 if found in set, -1 o/w 00126 int hashval = computeHash(u); 00127 SET_NODE *p = hashTable[hashval]; 00128 while(p!=0 && !(p->u==u)) p=p->next; 00129 return ((p==0)? -1: 1); 00130 } 00131 00132 00133 protected: 00134 SET_NODE *internalAdd(const ObjType &u) { 00135 int hashval = computeHash(u); 00136 00137 // look for this object in the appropriate linked list 00138 for (SET_NODE* p=hashTable[hashval]; p!=0; p=p->next) 00139 if (p->u==u) return 0; // already a member 00140 00141 // create new node and stick at head of linked list 00142 SET_NODE *sn = createNode(u); 00143 sn->idx = currentSize; 00144 sn->next = hashTable[hashval]; 00145 hashTable[hashval] = sn; 00146 return sn; 00147 } 00148 00149 int internalRemove(const ObjType &u) { 00150 // returns index of extracted object, or -1 if not there. 00151 int hashval = computeHash(u); 00152 SET_NODE* sn = hashTable[hashval]; 00153 SET_NODE** pLast = &(hashTable[hashval]); 00154 while (sn) { 00155 if (sn->u == u) { 00156 int idx = sn->idx; 00157 *pLast = sn->next; 00158 deleteNode(sn); 00159 return idx; 00160 } 00161 pLast = &(sn->next); 00162 sn = sn->next; 00163 } 00164 return -1; 00165 } 00166 00167 long smallestPrimeGreaterThan(const int n) { 00168 for (int i=n+1;; i++) { 00169 // look for a j<sqrt(i) which divides evenly into i 00170 int s = (int) sqrt((float) i); 00171 int j; 00172 for (j=2; j<=s; j++) 00173 if ((i % j)==0) break; 00174 if (j>s) return i; 00175 } 00176 } 00177 00178 protected: 00179 int computeHash(const ObjType &u) const { 00180 int h = u.hash() % hashTableSize; 00181 if (h<0) h*=-1; 00182 return h; 00183 } 00184 00185 // memory management of nodes 00186 SET_NODE *createNode(const ObjType &u) { 00187 SET_NODE *n = new SET_NODE; 00188 n->u = u; 00189 return n; 00190 } 00191 00192 void deleteNode(SET_NODE *node) { delete node; } 00193 00194 protected: 00195 int currentSize; // # of elts currently in set 00196 int maxSize; // # of elts allowable in set 00197 int hashTableSize; // size of hash table containing set 00198 SET_NODE** hashTable; 00199 int setNodeSize; 00200 }; 00201 } 00202 } 00203 00204 #endif 00205 00206 00207