00001 /*========================================================================== 00002 * Copyright (c) 2001 Carnegie Mellon University. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 00013 // ------------------------------------------------------------------- 00014 // ISet.hpp (set with index) 00015 // ALB 00016 // Template set class derived from PSet.hpp 00017 // . retrieve objects in the order in which they were inserted 00018 // via operator[int] 00019 // . growable 00020 // ------------------------------------------------------------------- 00021 00022 #ifndef _ISETH_ 00023 #define _ISETH_ 00024 00025 #include "PSet.hpp" 00026 00027 00028 template <class ObjType> 00029 class ISet : public PSet<ObjType> 00030 { 00031 public: 00032 ISet(): PSet<ObjType>(), index(0) {} 00033 ISet(const int maxSize_p): index(0) { open(maxSize_p); } 00034 ~ISet() { close(); } 00035 00036 void open(const int maxSize_p) { 00037 PSet<ObjType>::open(maxSize_p); 00038 index = new typename PSet<ObjType>::SET_NODE* [maxSize+1]; 00039 memset(index, 0, (maxSize+1)*sizeof(typename PSet<ObjType>::SET_NODE*)); 00040 } 00041 00042 void close() { 00043 PSet<ObjType>::close(); 00044 delete [] index; 00045 index=0; 00046 } 00047 00048 void clear() { 00049 if (maxSize==0) return; 00050 close(); 00051 open(maxSize); 00052 } 00053 00054 int size() const { return currentSize; } 00055 00056 int add(const ObjType& u) { 00057 typename PSet<ObjType>::SET_NODE *sn = PSet<ObjType>::internalAdd(u); 00058 if (sn==0) return -1; 00059 index[sn->idx] = sn; 00060 if (++currentSize > maxSize) grow((int) (currentSize*GROW_FACTOR+1)); 00061 return sn->idx; 00062 } 00063 00064 int remove(const ObjType& u) { // remove u from set: returns 1 iff u was in set 00065 const int idx = internalRemove(u); 00066 if (idx==-1) return 0; // not a member 00067 currentSize--; 00068 return 1; // was a member (not anymore) 00069 } 00070 00071 int operator+=(const ObjType& u) // add an elt to set: returns 1 if added. 00072 { return add(u); } 00073 00074 int operator-=(const ObjType& u)// remove elt from set: returns 1 if removed. 00075 { return remove(u); } 00076 00077 // NB: When user removes elts. from set, the set is re-indexed, so 00078 // what is the n'th elt. now may be the n-m'th elt. sometime later 00079 ObjType& operator[](const int idx) const { // get n'th elt 00080 assert(idx<currentSize); 00081 return index[idx]->u; 00082 } 00083 00084 int operator[](const ObjType& u) const { // get idx of u, -1 if not there 00085 int hashval = computeHash(u); 00086 typename PSet<ObjType>::SET_NODE *p = hashTable[hashval]; 00087 while(p!=0 && !(p->u==u)) p=p->next; 00088 return ((p==0)? -1: p->idx); 00089 } 00090 00091 void grow(const int newSize) { 00092 maxSize = newSize; 00093 hashTableSize = smallestPrimeGreaterThan((int) (maxSize*SPARSENESS)); 00094 typename PSet<ObjType>::SET_NODE **newIndex = new typename PSet<ObjType>::SET_NODE* [maxSize+1]; 00095 typename PSet<ObjType>::SET_NODE **newHashTable = new typename PSet<ObjType>::SET_NODE* [hashTableSize]; 00096 memset(newHashTable, 0, hashTableSize*sizeof(typename PSet<ObjType>::SET_NODE *)); 00097 for (int i=0; i<currentSize; i++) { 00098 typename PSet<ObjType>::SET_NODE *sn = index[i]; 00099 const int hashval = computeHash(sn->u); 00100 typename PSet<ObjType>::SET_NODE *snNew = createNode(sn->u); 00101 snNew->idx = i; 00102 snNew->next = newHashTable[hashval]; 00103 newHashTable[hashval] = snNew; 00104 deleteNode(sn); 00105 newIndex[i] = snNew; 00106 } 00107 delete [] index; 00108 delete [] hashTable; 00109 index = newIndex; 00110 hashTable = newHashTable; 00111 } 00112 00113 protected: 00114 int internalRemove(const ObjType &u) { 00115 // Return idx of removed node. For efficiency, we re-index the set 00116 // so that what once was the last member now is the idx'th member. 00117 // (rather than renumbering the entire set starting from idx) 00118 int idx=PSet<ObjType>::internalRemove(u); 00119 if (idx==-1) return -1; 00120 index[idx] = index[currentSize-1]; 00121 index[idx]->idx = idx; 00122 index[currentSize-1] = 0; 00123 return idx; 00124 } 00125 00126 int internalRemove(const ObjType &u, const int idx) { 00127 PSet<ObjType>::internalRemove(u); 00128 index[idx] = index[currentSize-1]; 00129 if (index[idx]->idx != -2) index[idx]->idx=idx; 00130 index[currentSize-1] = 0; 00131 return idx; 00132 } 00133 00134 protected: 00135 typename PSet<ObjType>::SET_NODE* (*index); // goes from [dense idx] -> node 00136 }; 00137 00138 #endif 00139 00140 00141 00142 00143 00144