00001 /*========================================================================== 00002 * Copyright (c) 2002 University of Massachusetts. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 // David Fisher 00013 // init: 11/18/2002 00014 #ifndef _CLUSTER_HPP 00015 #define _CLUSTER_HPP 00016 #include <string> 00017 #include "common_headers.hpp" 00018 #include "Index.hpp" 00019 #include "ClusterRep.hpp" 00020 #include "SimilarityMethod.hpp" 00021 #include "ClusterParam.hpp" 00022 namespace lemur 00023 { 00024 namespace cluster 00025 { 00026 // need an enumeration of types 00027 #define DOC_ELT 1 00028 #define CLUSTER_ELT 2 00029 // maximum length of a cluster name string. 00030 #define NAMESIZE 50 00031 00032 class ClusterElt { 00033 public: 00034 bool operator==(const ClusterElt& second) const { 00035 return (myType == second.myType && id == second.id); 00036 } 00037 int id; 00038 int myType; 00039 }; 00040 00042 class Cluster { 00043 public: 00045 Cluster(int cid, const lemur::api::Index &ind, 00046 const SimilarityMethod &sim); 00048 virtual ~Cluster(); 00050 void setId(int cid); 00052 void setName(const string &newName); 00054 int getId() const; 00056 const string &getName() const; 00058 const vector <ClusterElt> *getIds() const; 00060 vector <lemur::api::DOCID_T> getDocIds() const; 00064 virtual ClusterRep *getClusterRep() const = 0; 00066 virtual double score(const ClusterRep *rep) const = 0; 00068 virtual void add(const ClusterElt &elt); 00070 virtual void add(const vector<lemur::api::DOCID_T> docids); 00072 virtual void remove(const ClusterElt &elt); 00074 virtual vector<Cluster *> split(int numParts = 2); 00076 virtual void merge(const Cluster *c); 00078 virtual bool read(ifstream &in); 00080 virtual void write(ofstream &out); 00082 void print() const ; 00084 int getSize() const { return size;} 00086 virtual double sum2() const { return 0; } 00088 virtual string getKeyWords(int numTerms = 10) const ; 00089 00090 protected: 00092 int id; 00094 string name; 00096 vector<ClusterElt> ids; 00098 const SimilarityMethod &similarity; 00100 int size; 00101 double weight; 00102 const lemur::api::Index &ind; 00103 fstream *datFile; 00104 long offset; 00105 }; 00106 } 00107 } 00108 00109 #endif