00001 /*========================================================================== 00002 * Copyright (c) 2002 University of Massachusetts. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 // David Fisher 00013 // init: 11/18/2002 00014 #ifndef _CLUSTER_HPP 00015 #define _CLUSTER_HPP 00016 #include <string> 00017 #include "common_headers.hpp" 00018 #include "Index.hpp" 00019 #include "ClusterRep.hpp" 00020 #include "SimilarityMethod.hpp" 00021 #include "ClusterParam.hpp" 00024 #define DOC_ELT 1 00025 #define CLUSTER_ELT 2 00026 00027 #define NAMESIZE 50 00028 00029 class ClusterElt { 00030 public: 00031 bool operator==(const ClusterElt& second) const { 00032 return (myType == second.myType && id == second.id); 00033 } 00034 int id; 00035 int myType; 00036 }; 00037 00039 class Cluster { 00040 public: 00042 Cluster(int cid, const Index &ind, const SimilarityMethod &sim); 00044 virtual ~Cluster(); 00046 void setId(int cid); 00048 void setName(const string &newName); 00050 int getId() const; 00052 const string &getName() const; 00054 const vector <ClusterElt> *getIds() const; 00056 vector <DOCID_T> getDocIds() const; 00060 virtual ClusterRep *getClusterRep() const = 0; 00062 virtual double score(const ClusterRep *rep) const = 0; 00064 virtual void add(const ClusterElt &elt); 00066 virtual void add(const vector<DOCID_T> docids); 00068 virtual void remove(const ClusterElt &elt); 00070 virtual vector<Cluster *> split(int numParts = 2); 00072 virtual void merge(const Cluster *c); 00074 virtual bool read(ifstream &in); 00076 virtual void write(ofstream &out); 00078 void print() const ; 00080 int getSize() const { return size;} 00082 virtual double sum2() const { return 0; } 00084 virtual string getKeyWords(int numTerms = 10) const ; 00085 00086 protected: 00088 int id; 00090 string name; 00092 vector<ClusterElt> ids; 00093 const SimilarityMethod &similarity; 00094 int size; 00095 double weight; 00096 const Index &ind; 00097 fstream *datFile; 00098 long offset; 00099 }; 00100 #endif