00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #ifndef _CLUSTERDB_HPP
00015 #define _CLUSTERDB_HPP
00016 #include <stdexcept>
00017 #include "common_headers.hpp"
00018 #include "Index.hpp"
00019 #include "IndexedReal.hpp"
00020 #include "ClusterParam.hpp"
00021 #include "ClusterFactory.hpp"
00022 #include "SimFactory.hpp"
00023 #include "ThresholdFcn.hpp"
00024 namespace lemur
00025 {
00027 namespace api
00028 {
00029
00031 class ClusterDBError : public runtime_error
00032 {
00033 public:
00034 ClusterDBError(const string& s) : runtime_error(s) { };
00035 };
00036
00038
00039 class ClusterDB
00040 {
00041 public:
00043 ClusterDB(const Index *ind,
00044 double threshold = 0.25,
00045 enum ClusterParam::simTypes simType = ClusterParam::COS,
00046 enum ClusterParam::clusterTypes clusterType=ClusterParam::CENTROID,
00047 enum ClusterParam::docModes docMode = ClusterParam::DMAX);
00048
00050 virtual ~ClusterDB();
00051
00053 virtual int countClusters() const = 0;
00054
00056 virtual int maxID() const = 0;
00057
00059 virtual vector<lemur::cluster::Cluster*> getDocCluster(lemur::api::DOCID_T docId) const = 0;
00060
00062 virtual lemur::cluster::Cluster *getCluster(int clusterId) const = 0;
00063
00065 virtual vector<int> getDocClusterId(lemur::api::DOCID_T docId) const = 0;
00066
00068 virtual int addToCluster(lemur::api::DOCID_T docId, int clusterId, double score) = 0;
00069
00071 virtual int addToCluster(lemur::api::DOCID_T docId, lemur::cluster::Cluster *cluster, double score) = 0;
00072
00074 virtual int removeFromCluster(lemur::api::DOCID_T docId, int clusterID) = 0;
00076 virtual int deleteCluster(int clusterID) = 0;
00077
00079 virtual int deleteCluster (lemur::cluster::Cluster *target) = 0;
00080
00082 int addCluster(lemur::cluster::Cluster *oldCluster);
00083
00085 virtual vector<int> splitCluster(int cid, int num = 2) = 0;
00086
00088 virtual int mergeClusters(int cid1, int cid2) = 0;
00089
00091 virtual void printClusters() const;
00092
00094 virtual int cluster(lemur::api::DOCID_T docId);
00095
00097 virtual int cluster(lemur::api::DOCID_T docId, double &finalScore);
00098
00100 virtual string getKeyWords(int cid, int numTerms = 10) const;
00101
00102 protected:
00104 const Index *index;
00106 int numDocs;
00108 int numTerms;
00110 double threshold;
00112 mutable vector<lemur::cluster::Cluster *> clusters;
00114 const lemur::cluster::SimilarityMethod *sim;
00116 lemur::cluster::ClusterFactory *factory;
00118 virtual lemur::cluster::Cluster * newCluster() = 0;
00120 lemur::cluster::Cluster* allocateCluster(int clusterID) const;
00122 lemur::cluster::ThresholdFcn *thresh;
00123 };
00124 }
00125 }
00126
00127 #endif