#include <Arabic_Stemmer.hpp>
Public Member Functions | |
Arabic_Stemmer (std::string stemFunc) | |
~Arabic_Stemmer () | |
void | stemTerm (char *, char *) |
Private Member Functions | |
void | arabic_remove_diacritics (char *, char *) |
void | arabic_stop (char *, char *) |
void | no_stem (char *, char *) |
void | arabic_norm2 (char *, char *) |
void | arabic_norm2_stop (char *, char *) |
void | arabic_light10 (char *, char *) |
void | arabic_light10_stop (char *, char *) |
bool | on_stop_list (char *word) |
int | is_whitespace (const char c) |
void | remove_definite_articles (char *word, char *result) |
void | remove_all_suffixes (char *word, char *result, size_t lenlimit) |
Private Attributes | |
void(Arabic_Stemmer::* | stem_fct )(char *, char *) |
std::set< const char *, ltstr > | stop_words_ht |
Static Private Attributes | |
stem_info_t | stemtable [] |
const int | ArabicVowel [256] |
const int | Norm3Char [256] |
const int | NormChar [256] |
const int | isWhitespace [256] |
const char * | stopwords [] |
const char * | suffixes [] |
const char * | defarticles [] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Initial value: { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0xe6,0,0,0,0,0,0xec,0xed,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} |
|
Initial value: {"ال", "وال","بال", "كال", "فال", "لل", "\0"} |
|
Initial value: { 0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1, 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} |
|
Initial value: { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0x81,0,0,0,0,0,0,0,0,0x8a,0,0,0x8d,0x8e,0x8f, 0x90,0,0,0,0,0,0,0,0x98,0,0x9a,0,0,0,0,0x9f, 0,0,0,0,0,0,0,0,0,0,0xaa,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0xc0,0xc7,0xc7,0xc7,0xc7,0xc7,0xc7,0xc7,0xc8,0xe5,0xca,0xcb,0xcc,0xcd,0xce,0xcf, 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, 0,0xd8,0xd9,0xda,0xdb, 0,0xdd,0xde,0xdf, 0,0xe1, 0,0xe3,0xe4,0xe5,0xe6, 0, 0, 0, 0, 0,0xed, 0xed, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} |
|
Initial value: { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0x81,0,0,0,0,0,0,0,0,0x8a,0,0,0x8d,0x8e,0x8f, 0x90,0,0,0,0,0,0,0,0x98,0,0x9a,0,0,0,0,0x9f, 0,0,0,0,0,0,0,0,0,0,0xaa,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0xc0,0xc1,0xc7,0xc7,0xc4,0xc7,0xc6,0xc7,0xc8,0xe5,0xca,0xcb,0xcc,0xcd,0xce,0xcf, 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, 0,0xd8,0xd9,0xda,0xdb, 0,0xdd,0xde,0xdf, 0,0xe1, 0,0xe3,0xe4,0xe5,0xe6, 0, 0, 0, 0, 0,0xed, 0xed, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} |
|
|
|
Initial value: { {"none", &Arabic_Stemmer::no_stem}, {"arabic_stop", &Arabic_Stemmer::arabic_stop}, {"arabic_norm2", &Arabic_Stemmer::arabic_norm2}, {"arabic_norm2_stop", &Arabic_Stemmer::arabic_norm2_stop}, {"arabic_light10", &Arabic_Stemmer::arabic_light10}, {"arabic_light10_stop", &Arabic_Stemmer::arabic_light10_stop} } |
|
|
|
Initial value: { "ان","بعد", "ضد", "يلي", "الى", "في", "من", "حتى", "وهو", "يكون", "به", "وليس", "أحد", "على", "وكان", "تلك", "كذلك", "التي", "وبين", "فيها", "عليها", "إن", "وعلى", "لكن", "عن", "مساء", "ليس", "منذ", "الذي", "أما", "حين", "ومن", "لا", "ليسب", "وكانت", "أي", "ما", "عنه", "حول", "دون", "مع", "لكنه", "ولكن", "له", "هذا", "والتي","فقط", "ثم", "هذه", "أنه", "تكون", "قد", "بين", "جدا", "لن", "نحو", "كان", "لهم", "لأن", "اليوم", "لم", "هؤلاء", "فإن", "فيه", "ذلك", "لو", "عند", "اللذين", "كل", "بد", "لدى", "وثي", "أن", "ومع", "فقد", "بل", "هو", "عنها", "منه", "بها", "وفي", "فهو", "تحت", "لها", "أو", "إذ", "علي", "عليه", "كما", "كيف", "هنا", "وقد", "كانت", "لذلك", "أمام", "هناك", "قبل", "معه", "يوم", "منها", "إلى", "إذا", "هل", "حيث", "هي", "اذا", "او", "و", "ما", "لا", "الي", "إلي", "مازال", "لازال", "لايزال", "مايزال", "اصبح", "أصبح", "أمسى", "امسى", "أضحى", "اضحى", "ظل", "مابرح", "مافتئ", "ماانفك", "بات", "صار", "ليس", "إن", "كأن", "ليت", "لعل", "لاسيما", "ولايزال", "الحالي", "ضمن", "اول", "وله", "ذات", "اي", "بدلا", "اليها", "انه", "الذين", "فانه", "وان", "والذي", "وهذا", "لهذا", "الا", "فكان", "ستكون", "مما", "أبو", "بإن", "الذي", "اليه", "يمكن", "بهذا", "لدي", "وأن", "وهي", "وأبو", "آل", "الذي", "هن", "الذى", NULL } |
|
Initial value: {"ها","ان","ات","ون","ين","يه","ية", "ه","ة","ي","\0"} |