#include <KrovetzStemmer.hpp>
Public Member Functions | |
KrovetzStemmer () | |
~KrovetzStemmer () | |
char * | kstem_stemmer (char *term) |
stem a term using the Krovetz algorithm. The stem returned may be longer than the input term. May return a pointer to the private attribute stem. Performs case normalization on its input argument. Return values should be copied before calling the method again. | |
int | kstem_stem_tobuffer (char *term, char *buffer) |
stem a term using the Krovetz algorithm into the specified buffer. The stem returned may be longer than the input term. Performs case normalization on its input argument. | |
void | kstem_add_table_entry (const char *variant, const char *word, bool exc=false) |
Add an entry to the stemmer's dictionary table. | |
Static Public Attributes | |
const int | MAX_WORD_LENGTH = 25 |
maximum number of characters in a word to be stemmed. | |
Private Types | |
typedef indri::parse::KrovetzStemmer::dictEntry | dictEntry |
Dictionary table entry. | |
typedef indri::parse::KrovetzStemmer::cacheEntry | cacheEntry |
Two term hashtable entry for caching across calls. | |
typedef hash_map< const char *, dictEntry, hash< const char * >, eqstr > | dictTable |
Private Member Functions | |
bool | ends (const char *s, int sufflen) |
void | setsuff (const char *str, int length) |
dictEntry * | getdep (char *word) |
bool | lookup (char *word) |
bool | cons (int i) |
bool | vowelinstem () |
bool | vowel (int i) |
bool | doublec (int i) |
void | plural () |
void | past_tense () |
void | aspect () |
void | ion_endings () |
void | er_and_or_endings () |
void | ly_endings () |
void | al_endings () |
void | ive_endings () |
void | ize_endings () |
void | ment_endings () |
void | ity_endings () |
void | ble_endings () |
void | ness_endings () |
void | ism_endings () |
void | ic_endings () |
void | ncy_endings () |
void | nce_endings () |
void | loadTables () |
Private Attributes | |
indri::thread::Mutex | _stemLock |
lock for protecting stem calls | |
dictTable | dictEntries |
cacheEntry * | stemCache |
int | stemhtsize |
int | k |
int | j |
char * | word |
char | stem [MAX_WORD_LENGTH] |
|
Two term hashtable entry for caching across calls.
|
|
Dictionary table entry.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Add an entry to the stemmer's dictionary table.
|
|
stem a term using the Krovetz algorithm into the specified buffer. The stem returned may be longer than the input term. Performs case normalization on its input argument.
|
|
stem a term using the Krovetz algorithm. The stem returned may be longer than the input term. May return a pointer to the private attribute stem. Performs case normalization on its input argument. Return values should be copied before calling the method again.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
lock for protecting stem calls
|
|
|
|
|
|
|
|
maximum number of characters in a word to be stemmed.
|
|
|
|
|
|
|
|
|