00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 #ifndef INDRI_STOPPERTRANSFORMATION_HPP
00020 #define INDRI_STOPPERTRANSFORMATION_HPP
00021 
00022 #include "indri/Transformation.hpp"
00023 #include <string>
00024 #include <vector>
00025 #include "indri/Parameters.hpp"
00026 
00027 #ifdef WIN32
00028 #include <hash_set>
00029 #else
00030 
00031 #ifndef HAVE_GCC_VERSION
00032 #define HAVE_GCC_VERSION(MAJOR, MINOR) \
00033   (__GNUC__ > (MAJOR) || (__GNUC__ == (MAJOR) && __GNUC_MINOR__ >= (MINOR)))
00034 #endif 
00035 #if HAVE_GCC_VERSION(4,3)
00036 
00037 #include <tr1/unordered_set>
00038 #else
00039 #include <ext/hash_set>
00040 #endif
00041 
00042 using namespace __gnu_cxx;
00043 #endif
00044 
00045 namespace indri
00046 {
00047   namespace parse
00048   {
00049     
00050     class StopperTransformation : public Transformation {
00051     private:
00052       ObjectHandler<indri::api::ParsedDocument>* _handler;
00053 #ifdef WIN32
00054       struct ltstr {
00055         bool operator()( const char* s1,  const char* s2) const {
00056           return (strcmp(s1, s2) < 0);
00057         }
00058       };
00059       
00060       
00061       typedef stdext::hash_set< const char *, stdext::hash_compare< const char *, ltstr> > dictTable;
00062 #else
00063       struct eqstr {
00064         bool operator()(char* s1, char* s2) const {
00065           return strcmp(s1, s2) == 0;
00066         }
00067       };
00068 #if HAVE_GCC_VERSION(4,3)
00069       typedef std::tr1::unordered_set<char *, std::tr1::hash<std::string>, eqstr> dictTable;
00070 #else
00071       typedef hash_set<char *, hash<char *>, eqstr> dictTable;
00072 #endif
00073 #endif
00074 
00075       dictTable _table;
00076 
00077     public:
00078       StopperTransformation();
00079       StopperTransformation( const std::vector<std::string>& stopwords );
00080       StopperTransformation( const std::vector<const char*>& stopwords );
00081       StopperTransformation( const std::vector<char*>& stopwords );
00082       StopperTransformation( indri::api::Parameters& stopwords );
00083       ~StopperTransformation();
00084 
00085       void read( const std::vector<std::string>& stopwords );
00086       void read( const std::vector<const char*>& stopwords );
00087       void read( const std::vector<char*>& stopwords );
00088       void read( const std::string& filename );
00089       void read( indri::api::Parameters& stopwords );
00090 
00091       indri::api::ParsedDocument* transform( indri::api::ParsedDocument* document );
00092 
00093       void handle( indri::api::ParsedDocument* document );
00094       void setHandler( ObjectHandler<indri::api::ParsedDocument>& handler );
00095     };
00096   }
00097 }
00098 
00099 #endif // INDRI_STOPPERTRANSFORMATION_HPP
00100