00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #ifndef _CLUSTERDB_HPP
00015 #define _CLUSTERDB_HPP
00016 #include <stdexcept>
00017 #include "common_headers.hpp"
00018 #include "Index.hpp"
00019 #include "IndexedReal.hpp"
00020 #include "ClusterParam.hpp"
00021 #include "ClusterFactory.hpp"
00022 #include "SimFactory.hpp"
00023 #include "ThresholdFcn.hpp"
00024
00026 class ClusterDBError : public runtime_error
00027 {
00028 public:
00029 ClusterDBError(const string& s) : runtime_error(s) { };
00030 };
00031
00033
00034 class ClusterDB
00035 {
00036 public:
00038 ClusterDB(const Index *ind,
00039 double threshold = 0.25,
00040 enum ClusterParam::simTypes simType = ClusterParam::COS,
00041 enum ClusterParam::clusterTypes clusterType=ClusterParam::CENTROID,
00042 enum ClusterParam::docModes docMode = ClusterParam::DMAX);
00043
00045 virtual ~ClusterDB();
00046
00048 virtual int countClusters() const = 0;
00049
00051 virtual int maxID() const = 0;
00052
00054 virtual vector<Cluster*> getDocCluster(DOCID_T docId) const = 0;
00055
00057 virtual Cluster *getCluster(int clusterId) const = 0;
00058
00060 virtual vector<int> getDocClusterId(DOCID_T docId) const = 0;
00061
00063 virtual int addToCluster(DOCID_T docId, int clusterId, double score) = 0;
00064
00066 virtual int addToCluster(DOCID_T docId, Cluster *cluster, double score) = 0;
00067
00069 virtual int removeFromCluster(DOCID_T docId, int clusterID) = 0;
00071 virtual int deleteCluster(int clusterID) = 0;
00072
00074 virtual int deleteCluster (Cluster *target) = 0;
00075
00077 int addCluster(Cluster *oldCluster);
00078
00080 virtual vector<int> splitCluster(int cid, int num = 2) = 0;
00081
00083 virtual int mergeClusters(int cid1, int cid2) = 0;
00084
00086 virtual void printClusters() const;
00087
00089 virtual int cluster(DOCID_T docId);
00090
00092 virtual int cluster(DOCID_T docId, double &finalScore);
00093
00095 virtual string getKeyWords(int cid, int numTerms = 10) const;
00096
00097 protected:
00099 const Index *index;
00101 int numDocs;
00103 int numTerms;
00105 double threshold;
00107
00109
00111
00113
00115
00117
00118 };
00119 #endif