Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

lemur::api::ClusterDB Class Reference

Abstract interface for clustering databases. More...

#include <ClusterDB.hpp>

Inheritance diagram for lemur::api::ClusterDB:

lemur::cluster::FlatFileClusterDB lemur::cluster::KeyfileClusterDB List of all members.

Public Member Functions

 ClusterDB (const Index *ind, double threshold=0.25, enum ClusterParam::simTypes simType=ClusterParam::COS, enum ClusterParam::clusterTypes clusterType=ClusterParam::CENTROID, enum ClusterParam::docModes docMode=ClusterParam::DMAX)
 initialize the cluster method

virtual ~ClusterDB ()
 clean up

virtual int countClusters () const =0
 Return number of clusters.

virtual int maxID () const =0
 Return highest cluster ID.

virtual vector< lemur::cluster::Cluster * > getDocCluster (lemur::api::DOCID_T docId) const =0
 Get the Cluster for the given docId.

virtual lemur::cluster::ClustergetCluster (int clusterId) const =0
 Get the Cluster for the given clusterId.

virtual vector< int > getDocClusterId (lemur::api::DOCID_T docId) const =0
 Get the Cluster id for the given docId.

virtual int addToCluster (lemur::api::DOCID_T docId, int clusterId, double score)=0
 Add a document id to a cluster, given the cluster id.

virtual int addToCluster (lemur::api::DOCID_T docId, lemur::cluster::Cluster *cluster, double score)=0
 Add a document id to a cluster, given the cluster.

virtual int removeFromCluster (lemur::api::DOCID_T docId, int clusterID)=0
 Remove a document id from a cluster, given the cluster id.

virtual int deleteCluster (int clusterID)=0
 Delete a cluster entirely.

virtual int deleteCluster (lemur::cluster::Cluster *target)=0
 Delete a cluster entirely.

int addCluster (lemur::cluster::Cluster *oldCluster)
 Add a cluster to the DB (renumbers the cluster id). Returns new id.

virtual vector< int > splitCluster (int cid, int num=2)=0
 split cluster using Cluster::split

virtual int mergeClusters (int cid1, int cid2)=0
 merge two clusters.

virtual void printClusters () const
 pretty print all clusters to the standard output.

virtual int cluster (lemur::api::DOCID_T docId)
 Assign a document to a cluster.

virtual int cluster (lemur::api::DOCID_T docId, double &finalScore)
 Assign a document to a cluster, returning score.

virtual string getKeyWords (int cid, int numTerms=10) const
 Get the top N keywords for a cluster.


Protected Member Functions

virtual lemur::cluster::ClusternewCluster ()=0
 subclass specific cluster intitialization.

lemur::cluster::ClusterallocateCluster (int clusterID) const
 Uses ClusterFactory to create Cluster objects.


Protected Attributes

const Indexindex
 Database containing the collection to operate on.

int numDocs
 Number of documents in the database, reduces calls to db->docCount().

int numTerms
 Number of terms in the database.

double threshold
 threshold for YES/NO decisions

vector< lemur::cluster::Cluster * > clusters
 the cluster database.

const lemur::cluster::SimilarityMethodsim
 Similarity method to use.

lemur::cluster::ClusterFactoryfactory
 Cluster factory.

lemur::cluster::ThresholdFcnthresh
 Threshold function for adaptive thresholding.


Detailed Description

Abstract interface for clustering databases.


Constructor & Destructor Documentation

lemur::api::ClusterDB::ClusterDB const Index ind,
double  threshold = 0.25,
enum ClusterParam::simTypes  simType = ClusterParam::COS,
enum ClusterParam::clusterTypes  clusterType = ClusterParam::CENTROID,
enum ClusterParam::docModes  docMode = ClusterParam::DMAX
 

initialize the cluster method

lemur::api::ClusterDB::~ClusterDB  )  [virtual]
 

clean up


Member Function Documentation

int lemur::api::ClusterDB::addCluster lemur::cluster::Cluster oldCluster  ) 
 

Add a cluster to the DB (renumbers the cluster id). Returns new id.

virtual int lemur::api::ClusterDB::addToCluster lemur::api::DOCID_T  docId,
lemur::cluster::Cluster cluster,
double  score
[pure virtual]
 

Add a document id to a cluster, given the cluster.

Implemented in lemur::cluster::FlatFileClusterDB, and lemur::cluster::KeyfileClusterDB.

virtual int lemur::api::ClusterDB::addToCluster lemur::api::DOCID_T  docId,
int  clusterId,
double  score
[pure virtual]
 

Add a document id to a cluster, given the cluster id.

Implemented in lemur::cluster::FlatFileClusterDB, and lemur::cluster::KeyfileClusterDB.

lemur::cluster::Cluster * lemur::api::ClusterDB::allocateCluster int  clusterID  )  const [protected]
 

Uses ClusterFactory to create Cluster objects.

int lemur::api::ClusterDB::cluster lemur::api::DOCID_T  docId,
double &  finalScore
[virtual]
 

Assign a document to a cluster, returning score.

int lemur::api::ClusterDB::cluster lemur::api::DOCID_T  docId  )  [virtual]
 

Assign a document to a cluster.

virtual int lemur::api::ClusterDB::countClusters  )  const [pure virtual]
 

Return number of clusters.

Implemented in lemur::cluster::FlatFileClusterDB, and lemur::cluster::KeyfileClusterDB.

virtual int lemur::api::ClusterDB::deleteCluster lemur::cluster::Cluster target  )  [pure virtual]
 

Delete a cluster entirely.

Implemented in lemur::cluster::FlatFileClusterDB, and lemur::cluster::KeyfileClusterDB.

virtual int lemur::api::ClusterDB::deleteCluster int  clusterID  )  [pure virtual]
 

Delete a cluster entirely.

Implemented in lemur::cluster::FlatFileClusterDB, and lemur::cluster::KeyfileClusterDB.

virtual lemur::cluster::Cluster* lemur::api::ClusterDB::getCluster int  clusterId  )  const [pure virtual]
 

Get the Cluster for the given clusterId.

Implemented in lemur::cluster::FlatFileClusterDB, and lemur::cluster::KeyfileClusterDB.

virtual vector<lemur::cluster::Cluster*> lemur::api::ClusterDB::getDocCluster lemur::api::DOCID_T  docId  )  const [pure virtual]
 

Get the Cluster for the given docId.

Implemented in lemur::cluster::FlatFileClusterDB, and lemur::cluster::KeyfileClusterDB.

virtual vector<int> lemur::api::ClusterDB::getDocClusterId lemur::api::DOCID_T  docId  )  const [pure virtual]
 

Get the Cluster id for the given docId.

Implemented in lemur::cluster::FlatFileClusterDB, and lemur::cluster::KeyfileClusterDB.

string lemur::api::ClusterDB::getKeyWords int  cid,
int  numTerms = 10
const [virtual]
 

Get the top N keywords for a cluster.

virtual int lemur::api::ClusterDB::maxID  )  const [pure virtual]
 

Return highest cluster ID.

Implemented in lemur::cluster::FlatFileClusterDB, and lemur::cluster::KeyfileClusterDB.

virtual int lemur::api::ClusterDB::mergeClusters int  cid1,
int  cid2
[pure virtual]
 

merge two clusters.

Implemented in lemur::cluster::FlatFileClusterDB, and lemur::cluster::KeyfileClusterDB.

virtual lemur::cluster::Cluster* lemur::api::ClusterDB::newCluster  )  [protected, pure virtual]
 

subclass specific cluster intitialization.

Implemented in lemur::cluster::FlatFileClusterDB, and lemur::cluster::KeyfileClusterDB.

void lemur::api::ClusterDB::printClusters  )  const [virtual]
 

pretty print all clusters to the standard output.

virtual int lemur::api::ClusterDB::removeFromCluster lemur::api::DOCID_T  docId,
int  clusterID
[pure virtual]
 

Remove a document id from a cluster, given the cluster id.

Implemented in lemur::cluster::FlatFileClusterDB, and lemur::cluster::KeyfileClusterDB.

virtual vector<int> lemur::api::ClusterDB::splitCluster int  cid,
int  num = 2
[pure virtual]
 

split cluster using Cluster::split

Implemented in lemur::cluster::FlatFileClusterDB, and lemur::cluster::KeyfileClusterDB.


Member Data Documentation

vector<lemur::cluster::Cluster *> lemur::api::ClusterDB::clusters [mutable, protected]
 

the cluster database.

lemur::cluster::ClusterFactory* lemur::api::ClusterDB::factory [protected]
 

Cluster factory.

const Index* lemur::api::ClusterDB::index [protected]
 

Database containing the collection to operate on.

int lemur::api::ClusterDB::numDocs [protected]
 

Number of documents in the database, reduces calls to db->docCount().

int lemur::api::ClusterDB::numTerms [protected]
 

Number of terms in the database.

const lemur::cluster::SimilarityMethod* lemur::api::ClusterDB::sim [protected]
 

Similarity method to use.

lemur::cluster::ThresholdFcn* lemur::api::ClusterDB::thresh [protected]
 

Threshold function for adaptive thresholding.

double lemur::api::ClusterDB::threshold [protected]
 

threshold for YES/NO decisions


The documentation for this class was generated from the following files:
Generated on Tue Jun 15 11:03:04 2010 for Lemur by doxygen 1.3.4