#include <KrovetzStemmer.hpp>
Public Member Functions | |
| KrovetzStemmer () | |
| ~KrovetzStemmer () | |
| char * | kstem_stemmer (char *term) |
| stem a term using the Krovetz algorithm. The stem returned may be longer than the input term. May return a pointer to the private attribute stem. Performs case normalization on its input argument. Return values should be copied before calling the method again. | |
| int | kstem_stem_tobuffer (char *term, char *buffer) |
| stem a term using the Krovetz algorithm into the specified buffer. The stem returned may be longer than the input term. Performs case normalization on its input argument. | |
| void | kstem_add_table_entry (const char *variant, const char *word, bool exc=false) |
| Add an entry to the stemmer's dictionary table. | |
Static Public Attributes | |
| const int | MAX_WORD_LENGTH = 25 |
| maximum number of characters in a word to be stemmed. | |
Private Types | |
| typedef indri::parse::KrovetzStemmer::dictEntry | dictEntry |
| Dictionary table entry. | |
| typedef indri::parse::KrovetzStemmer::cacheEntry | cacheEntry |
| Two term hashtable entry for caching across calls. | |
| typedef hash_map< const char *, dictEntry, hash< const char * >, eqstr > | dictTable |
Private Member Functions | |
| bool | ends (const char *s, int sufflen) |
| void | setsuff (const char *str, int length) |
| dictEntry * | getdep (char *word) |
| bool | lookup (char *word) |
| bool | cons (int i) |
| bool | vowelinstem () |
| bool | vowel (int i) |
| bool | doublec (int i) |
| void | plural () |
| void | past_tense () |
| void | aspect () |
| void | ion_endings () |
| void | er_and_or_endings () |
| void | ly_endings () |
| void | al_endings () |
| void | ive_endings () |
| void | ize_endings () |
| void | ment_endings () |
| void | ity_endings () |
| void | ble_endings () |
| void | ness_endings () |
| void | ism_endings () |
| void | ic_endings () |
| void | ncy_endings () |
| void | nce_endings () |
| void | loadTables () |
Private Attributes | |
| indri::thread::Mutex | _stemLock |
| lock for protecting stem calls | |
| dictTable | dictEntries |
| cacheEntry * | stemCache |
| int | stemhtsize |
| int | k |
| int | j |
| char * | word |
| char | stem [MAX_WORD_LENGTH] |
|
|
Two term hashtable entry for caching across calls.
|
|
|
Dictionary table entry.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
||||||||||||
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
||||||||||||||||
|
Add an entry to the stemmer's dictionary table.
|
|
||||||||||||
|
stem a term using the Krovetz algorithm into the specified buffer. The stem returned may be longer than the input term. Performs case normalization on its input argument.
|
|
|
stem a term using the Krovetz algorithm. The stem returned may be longer than the input term. May return a pointer to the private attribute stem. Performs case normalization on its input argument. Return values should be copied before calling the method again.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
||||||||||||
|
|
|
|
|
|
|
|
|
|
lock for protecting stem calls
|
|
|
|
|
|
|
|
|
|
|
|
maximum number of characters in a word to be stemmed.
|
|
|
|
|
|
|
|
|
|
|
|
|
1.3.4