00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #ifndef _OFFLINECLUSTER_HPP
00015 #define _OFFLINECLUSTER_HPP
00016 #include <set>
00017 #include "common_headers.hpp"
00018 #include "Index.hpp"
00019 #include "ClusterParam.hpp"
00020 #include "ClusterFactory.hpp"
00021 #include "SimFactory.hpp"
00022 namespace lemur
00023 {
00024 namespace cluster
00025 {
00026
00028 class OfflineCluster
00029 {
00030 public:
00032 OfflineCluster(const lemur::api::Index &ind,
00033 enum ClusterParam::simTypes simType = ClusterParam::COS,
00034 enum ClusterParam::clusterTypes clusterType = ClusterParam::CENTROID,
00035 enum ClusterParam::docModes docMode = ClusterParam::DMAX);
00036
00038 ~OfflineCluster();
00039
00042 vector<Cluster*> *kMeans(vector<lemur::api::DOCID_T> docIds,
00043 int numParts = 2, int maxIters = 100);
00044
00046 vector<Cluster*> *kMeans(Cluster *cluster, int numParts = 2,
00047 int maxIters = 100);
00048
00051 vector<Cluster*> *bisecting_kMeans(vector<lemur::api::DOCID_T> docIds,
00052 int numParts = 2,
00053 int numIters = 5, int maxIters = 100);
00054
00055 private:
00057 const SimilarityMethod *sim;
00059 ClusterFactory *factory;
00061 const lemur::api::Index &index;
00063 bool compareClusterSets(Cluster **, Cluster **, int n);
00065 vector <lemur::api::DOCID_T> selectSeeds(vector<lemur::api::DOCID_T> docIds, int num);
00067 Cluster *chooseSplit(vector<Cluster *> *working);
00069 double scoreSet(vector<Cluster *> *working);
00070 };
00071 }
00072 }
00073
00074 #endif