Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

ClusterDB.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2002 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 // David Fisher, Peter Amstutz
00013 // init: 11/18/2002
00014 #ifndef _CLUSTERDB_HPP
00015 #define _CLUSTERDB_HPP
00016 #include <stdexcept>
00017 #include "common_headers.hpp"
00018 #include "Index.hpp"
00019 #include "IndexedReal.hpp"
00020 #include "ClusterParam.hpp"
00021 #include "ClusterFactory.hpp"
00022 #include "SimFactory.hpp"
00023 #include "ThresholdFcn.hpp"
00024 
00026 class ClusterDBError : public runtime_error
00027 {
00028 public:
00029     ClusterDBError(const string& s) : runtime_error(s) { };
00030 };
00031 
00033 
00034 class ClusterDB
00035 {
00036 public:
00038   ClusterDB(const Index *ind, 
00039             double threshold = 0.25,
00040             enum ClusterParam::simTypes simType = ClusterParam::COS,
00041             enum ClusterParam::clusterTypes clusterType=ClusterParam::CENTROID,
00042             enum ClusterParam::docModes docMode = ClusterParam::DMAX);
00043 
00045   virtual ~ClusterDB();
00046 
00048   virtual int countClusters() const = 0;
00049 
00051   virtual int maxID() const = 0;
00052 
00054   virtual vector<Cluster*> getDocCluster(DOCID_T docId) const = 0;
00055 
00057   virtual Cluster *getCluster(int clusterId) const = 0;
00058 
00060   virtual vector<int> getDocClusterId(DOCID_T docId) const = 0;
00061 
00063   virtual int addToCluster(DOCID_T docId, int clusterId, double score) = 0;
00064 
00066   virtual int addToCluster(DOCID_T docId, Cluster *cluster, double score) = 0;
00067 
00069   virtual int removeFromCluster(DOCID_T docId, int clusterID) = 0;
00071   virtual int deleteCluster(int clusterID) = 0;
00072 
00074   virtual int deleteCluster (Cluster *target) = 0;
00075 
00077   int addCluster(Cluster *oldCluster);
00078 
00080   virtual vector<int> splitCluster(int cid, int num = 2) = 0;
00081 
00083   virtual int mergeClusters(int cid1, int cid2) = 0;
00084 
00086   virtual void printClusters() const;
00087 
00089   virtual int cluster(DOCID_T docId);
00090 
00092   virtual int cluster(DOCID_T docId, double &finalScore);
00093 
00095   virtual string getKeyWords(int cid, int numTerms = 10) const;
00096 
00097 protected:
00099   const Index *index;
00101   int numDocs;
00103   int numTerms;
00105   double threshold; // fix this.
00107   mutable vector<Cluster *> clusters;
00109   const SimilarityMethod *sim;
00111   ClusterFactory *factory;
00113   virtual Cluster * newCluster() = 0;
00115   Cluster* allocateCluster(int clusterID) const;
00117   ThresholdFcn *thresh;
00118 };
00119 #endif

Generated on Wed Nov 3 12:58:52 2004 for Lemur Toolkit by doxygen1.2.18