Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

PSet.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 // -------------------------------------------------------------------
00014 // PSet.hpp (Plain set)
00015 // ALB
00016 //
00017 // Template set class (base class of family of set classes)
00018 //    . add objects using operator+() or add()
00019 //    . remove objects using operator-() or remove()
00020 //    . query for the presence of an object in set via operator[object]
00021 //    . if user specifies no removals will occur, uses (efficient) block memory
00022 //
00023 // Required member functions of template ObjType:
00024 //     hash(): returns a 32-bit hash of object
00025 //         ==: returns 1 if objects are equal
00026 //          =: assignment operator
00027 // 
00028 // Note also: ISet (set with index)
00029 // -------------------------------------------------------------------
00030 
00031 #ifndef _PSETH_
00032 #define _PSETH_
00033 
00034 #include "common_headers.hpp"
00035 #include <cmath>
00036 #include <memory.h>
00037 #include <cassert>
00038 
00039 static const float SPARSENESS = 1.5;
00040 static const float GROW_FACTOR = 2.0;
00041 
00042 
00043 template <class ObjType>
00044 class PSet
00045 {
00046 protected:
00047   struct SET_NODE {
00048     ObjType          u;
00049     int              idx;           // used in derived classes, but not here
00050     struct SET_NODE *next;
00051   } ;
00052     
00053 public:
00054   PSet(): currentSize(0),maxSize(0),hashTable(0) {}
00055   PSet(const int maxSize_p) : currentSize(0),hashTable(0) { open(maxSize_p); }
00056   ~PSet() { close(); }
00057   
00058   const int size()    const { return currentSize; }
00059   const int maxsize() const { return maxSize;     }
00060 
00061   void open(const int maxSize_p) {
00062     assert(hashTable==0);   // cannot call open() twice!
00063     assert(maxSize_p>0); 
00064     maxSize = maxSize_p;
00065     if (maxSize<2) maxSize=2;
00066     hashTableSize = smallestPrimeGreaterThan((int) (maxSize*SPARSENESS));
00067     hashTable = new SET_NODE * [hashTableSize];
00068     memset(hashTable, 0, hashTableSize*sizeof(SET_NODE *));
00069     setNodeSize = sizeof(SET_NODE);
00070   }
00071   
00072   void close() {
00073     if (hashTable) {
00074       for (int i=0; i<hashTableSize; i++) {
00075         SET_NODE *node=hashTable[i]; 
00076         while (node) {
00077           SET_NODE *next = node->next; 
00078           delete node;
00079           node = next;
00080         }
00081       }
00082       delete [] hashTable; 
00083     }
00084     hashTable=0;
00085     currentSize=0;
00086   }
00087   
00088   void clear() {
00089     if (maxSize==0) return;
00090     close();
00091     open(maxSize);
00092   }
00093   
00094   int add(const ObjType& u) {
00095     SET_NODE *sn = internalAdd(u);
00096     if (sn==0) return -1;                       // already a member
00097     assert(currentSize++<=maxSize);   
00098     return sn->idx;                            // new member
00099   }
00100 
00101   int remove(const ObjType& u) { // remove u from set: returns 1 iff u was in set
00102     const int idx = internalRemove(u);
00103     if (idx==-1) return 0;                 // not a member
00104     currentSize--;
00105     return 1;                              // was a member (not anymore)
00106   }
00107   
00108   int operator+=(const ObjType& u)  // add an elt to set: returns 1 if added. 
00109   { return add(u); }
00110   
00111   int operator-=(const ObjType& u)// remove elt from set: returns 1 if removed.
00112   { return remove(u); }
00113   
00114   const ObjType &operator[](const int idx) const {
00115     int a;
00116     a=idx; // suppress compiler warnings about not using idx
00117     cerr <<"PSet: operator[] (int) not supported!"<<endl;
00118     abort();
00119   }
00120 
00121   int operator[] (const ObjType& u) const { // returns +1 if found in set, -1 o/w
00122     int hashval = computeHash(u);
00123     SET_NODE *p = hashTable[hashval];
00124     while(p!=0 && !(p->u==u)) p=p->next;
00125     return ((p==0)? -1: 1);
00126   }
00127   
00128 
00129 protected:
00130   SET_NODE *internalAdd(const ObjType &u) {
00131     int hashval = computeHash(u);
00132 
00133     // look for this object in the appropriate linked list
00134     for (SET_NODE* p=hashTable[hashval]; p!=0; p=p->next)
00135       if (p->u==u) return 0;                 // already a member
00136     
00137     // create new node and stick at head of linked list
00138     SET_NODE *sn = createNode(u);
00139     sn->idx  = currentSize;
00140     sn->next = hashTable[hashval];
00141     hashTable[hashval] = sn;    
00142     return sn;
00143   }
00144   
00145   int internalRemove(const ObjType &u) {
00146     // returns index of extracted object, or -1 if not there.
00147     int hashval = computeHash(u);
00148     SET_NODE* sn = hashTable[hashval];
00149     SET_NODE** pLast = &(hashTable[hashval]);
00150     while (sn) {
00151       if (sn->u == u) {
00152         int idx = sn->idx;
00153         *pLast = sn->next;
00154         deleteNode(sn);
00155         return idx;
00156       }
00157       pLast = &(sn->next);
00158       sn = sn->next;
00159     }
00160     return -1;
00161   }
00162   
00163   long smallestPrimeGreaterThan(const int n) {
00164     for (int i=n+1;; i++) {   
00165       // look for a j<sqrt(i) which divides evenly into i
00166       int s = (int) sqrt((float) i);
00167       int j;
00168       for (j=2; j<=s; j++)
00169         if ((i % j)==0) break; 
00170       if (j>s) return i;
00171     }
00172   }
00173   
00174 protected:
00175   int computeHash(const ObjType &u) const {
00176     int h = u.hash() % hashTableSize;
00177     if (h<0) h*=-1;
00178     return h;
00179   }
00180   
00181   // memory management of nodes
00182   SET_NODE *createNode(const ObjType &u) {
00183     SET_NODE *n = new SET_NODE;
00184     n->u = u;              
00185     return n;
00186   }
00187   
00188  void deleteNode(SET_NODE *node) { delete node; }
00189   
00190 protected:
00191   int currentSize;                    // # of elts currently in set 
00192   int maxSize;                        // # of elts allowable in set 
00193   int hashTableSize;              // size of hash table containing set  
00194   SET_NODE** hashTable;
00195   int setNodeSize;
00196 };
00197 
00198 #endif
00199 
00200 
00201 

Generated on Wed Nov 3 12:59:01 2004 for Lemur Toolkit by doxygen1.2.18