Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

ClusterDB.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2002 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 // David Fisher, Peter Amstutz
00013 // init: 11/18/2002
00014 #ifndef _CLUSTERDB_HPP
00015 #define _CLUSTERDB_HPP
00016 #include <stdexcept>
00017 #include "common_headers.hpp"
00018 #include "Index.hpp"
00019 #include "IndexedReal.hpp"
00020 #include "ClusterParam.hpp"
00021 #include "ClusterFactory.hpp"
00022 #include "SimFactory.hpp"
00023 #include "ThresholdFcn.hpp"
00024 namespace lemur
00025 {
00027   namespace api
00028   {
00029     
00031     class ClusterDBError : public runtime_error
00032     {
00033     public:
00034       ClusterDBError(const string& s) : runtime_error(s) { };
00035     };
00036 
00038 
00039     class ClusterDB
00040     {
00041     public:
00043       ClusterDB(const Index *ind, 
00044                 double threshold = 0.25,
00045                 enum ClusterParam::simTypes simType = ClusterParam::COS,
00046                 enum ClusterParam::clusterTypes clusterType=ClusterParam::CENTROID,
00047                 enum ClusterParam::docModes docMode = ClusterParam::DMAX);
00048 
00050       virtual ~ClusterDB();
00051 
00053       virtual int countClusters() const = 0;
00054 
00056       virtual int maxID() const = 0;
00057 
00059       virtual vector<lemur::cluster::Cluster*> getDocCluster(lemur::api::DOCID_T docId) const = 0;
00060 
00062       virtual lemur::cluster::Cluster *getCluster(int clusterId) const = 0;
00063 
00065       virtual vector<int> getDocClusterId(lemur::api::DOCID_T docId) const = 0;
00066 
00068       virtual int addToCluster(lemur::api::DOCID_T docId, int clusterId, double score) = 0;
00069 
00071       virtual int addToCluster(lemur::api::DOCID_T docId, lemur::cluster::Cluster *cluster, double score) = 0;
00072 
00074       virtual int removeFromCluster(lemur::api::DOCID_T docId, int clusterID) = 0;
00076       virtual int deleteCluster(int clusterID) = 0;
00077 
00079       virtual int deleteCluster (lemur::cluster::Cluster *target) = 0;
00080 
00082       int addCluster(lemur::cluster::Cluster *oldCluster);
00083 
00085       virtual vector<int> splitCluster(int cid, int num = 2) = 0;
00086 
00088       virtual int mergeClusters(int cid1, int cid2) = 0;
00089 
00091       virtual void printClusters() const;
00092 
00094       virtual int cluster(lemur::api::DOCID_T docId);
00095 
00097       virtual int cluster(lemur::api::DOCID_T docId, double &finalScore);
00098 
00100       virtual string getKeyWords(int cid, int numTerms = 10) const;
00101 
00102     protected:
00104       const Index *index;
00106       int numDocs;
00108       int numTerms;
00110       double threshold; // fix this.
00112       mutable vector<lemur::cluster::Cluster *> clusters;
00114       const lemur::cluster::SimilarityMethod *sim;
00116       lemur::cluster::ClusterFactory *factory;
00118       virtual lemur::cluster::Cluster * newCluster() = 0;
00120       lemur::cluster::Cluster* allocateCluster(int clusterID) const;
00122       lemur::cluster::ThresholdFcn *thresh;
00123     };
00124   }
00125 }
00126 
00127 #endif

Generated on Tue Jun 15 11:02:53 2010 for Lemur by doxygen 1.3.4