00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef INDRI_STOPPERTRANSFORMATION_HPP
00020 #define INDRI_STOPPERTRANSFORMATION_HPP
00021
00022 #include "indri/Transformation.hpp"
00023 #include <string>
00024 #include <vector>
00025 #include "indri/Parameters.hpp"
00026
00027 #ifdef WIN32
00028 #include <hash_set>
00029 #else
00030
00031 #ifndef HAVE_GCC_VERSION
00032 #define HAVE_GCC_VERSION(MAJOR, MINOR) \
00033 (__GNUC__ > (MAJOR) || (__GNUC__ == (MAJOR) && __GNUC_MINOR__ >= (MINOR)))
00034 #endif
00035 #if HAVE_GCC_VERSION(4,3)
00036
00037 #include <tr1/unordered_set>
00038 #else
00039 #include <ext/hash_set>
00040 #endif
00041
00042 using namespace __gnu_cxx;
00043 #endif
00044
00045 namespace indri
00046 {
00047 namespace parse
00048 {
00049
00050 class StopperTransformation : public Transformation {
00051 private:
00052 ObjectHandler<indri::api::ParsedDocument>* _handler;
00053 #ifdef WIN32
00054 struct ltstr {
00055 bool operator()( const char* s1, const char* s2) const {
00056 return (strcmp(s1, s2) < 0);
00057 }
00058 };
00059
00060
00061 typedef stdext::hash_set< const char *, stdext::hash_compare< const char *, ltstr> > dictTable;
00062 #else
00063 struct eqstr {
00064 bool operator()(char* s1, char* s2) const {
00065 return strcmp(s1, s2) == 0;
00066 }
00067 };
00068 #if HAVE_GCC_VERSION(4,3)
00069 typedef std::tr1::unordered_set<char *, std::tr1::hash<std::string>, eqstr> dictTable;
00070 #else
00071 typedef hash_set<char *, hash<char *>, eqstr> dictTable;
00072 #endif
00073 #endif
00074
00075 dictTable _table;
00076
00077 public:
00078 StopperTransformation();
00079 StopperTransformation( const std::vector<std::string>& stopwords );
00080 StopperTransformation( const std::vector<const char*>& stopwords );
00081 StopperTransformation( const std::vector<char*>& stopwords );
00082 StopperTransformation( indri::api::Parameters& stopwords );
00083 ~StopperTransformation();
00084
00085 void read( const std::vector<std::string>& stopwords );
00086 void read( const std::vector<const char*>& stopwords );
00087 void read( const std::vector<char*>& stopwords );
00088 void read( const std::string& filename );
00089 void read( indri::api::Parameters& stopwords );
00090
00091 indri::api::ParsedDocument* transform( indri::api::ParsedDocument* document );
00092
00093 void handle( indri::api::ParsedDocument* document );
00094 void setHandler( ObjectHandler<indri::api::ParsedDocument>& handler );
00095 };
00096 }
00097 }
00098
00099 #endif // INDRI_STOPPERTRANSFORMATION_HPP
00100