00001 /*========================================================================== 00002 * Copyright (c) 2001 Carnegie Mellon University. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 00013 // ------------------------------------------------------------------- 00014 // PSet.hpp (Plain set) 00015 // ALB 00016 // 00017 // Template set class (base class of family of set classes) 00018 // . add objects using operator+() or add() 00019 // . remove objects using operator-() or remove() 00020 // . query for the presence of an object in set via operator[object] 00021 // . if user specifies no removals will occur, uses (efficient) block memory 00022 // 00023 // Required member functions of template ObjType: 00024 // hash(): returns a 32-bit hash of object 00025 // ==: returns 1 if objects are equal 00026 // =: assignment operator 00027 // 00028 // Note also: ISet (set with index) 00029 // ------------------------------------------------------------------- 00030 00031 #ifndef _PSETH_ 00032 #define _PSETH_ 00033 00034 #include "common_headers.hpp" 00035 #include <cmath> 00036 #include <memory.h> 00037 #include <cassert> 00038 00039 static const float SPARSENESS = 1.5; 00040 static const float GROW_FACTOR = 2.0; 00041 00042 00043 template <class ObjType> 00044 class PSet 00045 { 00046 protected: 00047 struct SET_NODE { 00048 ObjType u; 00049 int idx; // used in derived classes, but not here 00050 struct SET_NODE *next; 00051 } ; 00052 00053 public: 00054 PSet(): currentSize(0),maxSize(0),hashTable(0) {} 00055 PSet(const int maxSize_p) : currentSize(0),hashTable(0) { open(maxSize_p); } 00056 ~PSet() { close(); } 00057 00058 const int size() const { return currentSize; } 00059 const int maxsize() const { return maxSize; } 00060 00061 void open(const int maxSize_p) { 00062 assert(hashTable==0); // cannot call open() twice! 00063 assert(maxSize_p>0); 00064 maxSize = maxSize_p; 00065 if (maxSize<2) maxSize=2; 00066 hashTableSize = smallestPrimeGreaterThan((int) (maxSize*SPARSENESS)); 00067 hashTable = new SET_NODE * [hashTableSize]; 00068 memset(hashTable, 0, hashTableSize*sizeof(SET_NODE *)); 00069 setNodeSize = sizeof(SET_NODE); 00070 } 00071 00072 void close() { 00073 if (hashTable) { 00074 for (int i=0; i<hashTableSize; i++) { 00075 SET_NODE *node=hashTable[i]; 00076 while (node) { 00077 SET_NODE *next = node->next; 00078 delete node; 00079 node = next; 00080 } 00081 } 00082 delete [] hashTable; 00083 } 00084 hashTable=0; 00085 currentSize=0; 00086 } 00087 00088 void clear() { 00089 if (maxSize==0) return; 00090 close(); 00091 open(maxSize); 00092 } 00093 00094 int add(const ObjType& u) { 00095 SET_NODE *sn = internalAdd(u); 00096 if (sn==0) return -1; // already a member 00097 assert(currentSize++<=maxSize); 00098 return sn->idx; // new member 00099 } 00100 00101 int remove(const ObjType& u) { // remove u from set: returns 1 iff u was in set 00102 const int idx = internalRemove(u); 00103 if (idx==-1) return 0; // not a member 00104 currentSize--; 00105 return 1; // was a member (not anymore) 00106 } 00107 00108 int operator+=(const ObjType& u) // add an elt to set: returns 1 if added. 00109 { return add(u); } 00110 00111 int operator-=(const ObjType& u)// remove elt from set: returns 1 if removed. 00112 { return remove(u); } 00113 00114 const ObjType &operator[](const int idx) const { 00115 int a; 00116 a=idx; // suppress compiler warnings about not using idx 00117 cerr <<"PSet: operator[] (int) not supported!"<<endl; 00118 abort(); 00119 } 00120 00121 int operator[] (const ObjType& u) const { // returns +1 if found in set, -1 o/w 00122 int hashval = computeHash(u); 00123 SET_NODE *p = hashTable[hashval]; 00124 while(p!=0 && !(p->u==u)) p=p->next; 00125 return ((p==0)? -1: 1); 00126 } 00127 00128 00129 protected: 00130 SET_NODE *internalAdd(const ObjType &u) { 00131 int hashval = computeHash(u); 00132 00133 // look for this object in the appropriate linked list 00134 for (SET_NODE* p=hashTable[hashval]; p!=0; p=p->next) 00135 if (p->u==u) return 0; // already a member 00136 00137 // create new node and stick at head of linked list 00138 SET_NODE *sn = createNode(u); 00139 sn->idx = currentSize; 00140 sn->next = hashTable[hashval]; 00141 hashTable[hashval] = sn; 00142 return sn; 00143 } 00144 00145 int internalRemove(const ObjType &u) { 00146 // returns index of extracted object, or -1 if not there. 00147 int hashval = computeHash(u); 00148 SET_NODE* sn = hashTable[hashval]; 00149 SET_NODE** pLast = &(hashTable[hashval]); 00150 while (sn) { 00151 if (sn->u == u) { 00152 int idx = sn->idx; 00153 *pLast = sn->next; 00154 deleteNode(sn); 00155 return idx; 00156 } 00157 pLast = &(sn->next); 00158 sn = sn->next; 00159 } 00160 return -1; 00161 } 00162 00163 long smallestPrimeGreaterThan(const int n) { 00164 for (int i=n+1;; i++) { 00165 // look for a j<sqrt(i) which divides evenly into i 00166 int s = (int) sqrt((float) i); 00167 int j; 00168 for (j=2; j<=s; j++) 00169 if ((i % j)==0) break; 00170 if (j>s) return i; 00171 } 00172 } 00173 00174 protected: 00175 int computeHash(const ObjType &u) const { 00176 int h = u.hash() % hashTableSize; 00177 if (h<0) h*=-1; 00178 return h; 00179 } 00180 00181 // memory management of nodes 00182 SET_NODE *createNode(const ObjType &u) { 00183 SET_NODE *n = new SET_NODE; 00184 n->u = u; 00185 return n; 00186 } 00187 00188 void deleteNode(SET_NODE *node) { delete node; } 00189 00190 protected: 00191 int currentSize; // # of elts currently in set 00192 int maxSize; // # of elts allowable in set 00193 int hashTableSize; // size of hash table containing set 00194 SET_NODE** hashTable; 00195 int setNodeSize; 00196 }; 00197 00198 #endif 00199 00200 00201