#include <Arabic_Stemmer.hpp>
Public Member Functions | |
| Arabic_Stemmer (std::string stemFunc) | |
| ~Arabic_Stemmer () | |
| void | stemTerm (char *, char *) |
Private Member Functions | |
| void | arabic_remove_diacritics (char *, char *) |
| void | arabic_stop (char *, char *) |
| void | no_stem (char *, char *) |
| void | arabic_norm2 (char *, char *) |
| void | arabic_norm2_stop (char *, char *) |
| void | arabic_light10 (char *, char *) |
| void | arabic_light10_stop (char *, char *) |
| bool | on_stop_list (char *word) |
| int | is_whitespace (const char c) |
| void | remove_definite_articles (char *word, char *result) |
| void | remove_all_suffixes (char *word, char *result, size_t lenlimit) |
Private Attributes | |
| void(Arabic_Stemmer::* | stem_fct )(char *, char *) |
| std::set< const char *, ltstr > | stop_words_ht |
Static Private Attributes | |
| stem_info_t | stemtable [] |
| const int | ArabicVowel [256] |
| const int | Norm3Char [256] |
| const int | NormChar [256] |
| const int | isWhitespace [256] |
| const char * | stopwords [] |
| const char * | suffixes [] |
| const char * | defarticles [] |
|
|
|
|
|
|
|
||||||||||||
|
|
|
||||||||||||
|
|
|
||||||||||||
|
|
|
||||||||||||
|
|
|
||||||||||||
|
|
|
||||||||||||
|
|
|
|
|
|
||||||||||||
|
|
|
|
|
|
||||||||||||||||
|
|
|
||||||||||||
|
|
|
||||||||||||
|
|
|
|
Initial value: {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7, 0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xe6,0,0,0,0,0,0xec,0xed,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
|
|
|
Initial value: {"ال", "وال","بال", "كال",
"فال", "لل", "\0"}
|
|
|
Initial value: {
0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
|
|
|
Initial value: {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0x81,0,0,0,0,0,0,0,0,0x8a,0,0,0x8d,0x8e,0x8f,
0x90,0,0,0,0,0,0,0,0x98,0,0x9a,0,0,0,0,0x9f,
0,0,0,0,0,0,0,0,0,0,0xaa,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xc0,0xc7,0xc7,0xc7,0xc7,0xc7,0xc7,0xc7,0xc8,0xe5,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, 0,0xd8,0xd9,0xda,0xdb, 0,0xdd,0xde,0xdf,
0,0xe1, 0,0xe3,0xe4,0xe5,0xe6, 0, 0, 0, 0, 0,0xed, 0xed, 0, 0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
|
|
|
Initial value: {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0x81,0,0,0,0,0,0,0,0,0x8a,0,0,0x8d,0x8e,0x8f,
0x90,0,0,0,0,0,0,0,0x98,0,0x9a,0,0,0,0,0x9f,
0,0,0,0,0,0,0,0,0,0,0xaa,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xc0,0xc1,0xc7,0xc7,0xc4,0xc7,0xc6,0xc7,0xc8,0xe5,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, 0,0xd8,0xd9,0xda,0xdb, 0,0xdd,0xde,0xdf,
0,0xe1, 0,0xe3,0xe4,0xe5,0xe6, 0, 0, 0, 0, 0,0xed, 0xed, 0, 0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
|
|
|
|
|
|
Initial value: {
{"none", &Arabic_Stemmer::no_stem},
{"arabic_stop", &Arabic_Stemmer::arabic_stop},
{"arabic_norm2", &Arabic_Stemmer::arabic_norm2},
{"arabic_norm2_stop", &Arabic_Stemmer::arabic_norm2_stop},
{"arabic_light10", &Arabic_Stemmer::arabic_light10},
{"arabic_light10_stop", &Arabic_Stemmer::arabic_light10_stop}
}
|
|
|
|
|
|
Initial value:
{ "ان","بعد", "ضد", "يلي", "الى", "في", "من", "حتى", "وهو", "يكون",
"به", "وليس", "أحد", "على", "وكان", "تلك", "كذلك", "التي", "وبين",
"فيها", "عليها", "إن", "وعلى", "لكن", "عن", "مساء", "ليس", "منذ",
"الذي", "أما", "حين", "ومن", "لا", "ليسب", "وكانت", "أي", "ما", "عنه",
"حول", "دون", "مع", "لكنه", "ولكن", "له", "هذا", "والتي","فقط", "ثم",
"هذه", "أنه", "تكون", "قد", "بين", "جدا", "لن", "نحو", "كان", "لهم",
"لأن", "اليوم", "لم", "هؤلاء", "فإن", "فيه", "ذلك", "لو", "عند",
"اللذين", "كل", "بد", "لدى", "وثي", "أن", "ومع", "فقد", "بل", "هو",
"عنها", "منه", "بها", "وفي", "فهو", "تحت", "لها", "أو", "إذ", "علي",
"عليه", "كما", "كيف", "هنا", "وقد", "كانت", "لذلك", "أمام", "هناك",
"قبل", "معه", "يوم", "منها", "إلى", "إذا", "هل", "حيث", "هي", "اذا",
"او", "و", "ما", "لا", "الي", "إلي", "مازال", "لازال", "لايزال",
"مايزال", "اصبح", "أصبح", "أمسى", "امسى", "أضحى", "اضحى", "ظل",
"مابرح", "مافتئ", "ماانفك", "بات", "صار", "ليس", "إن", "كأن",
"ليت", "لعل", "لاسيما", "ولايزال", "الحالي", "ضمن", "اول", "وله",
"ذات", "اي", "بدلا", "اليها", "انه", "الذين", "فانه", "وان",
"والذي", "وهذا", "لهذا", "الا", "فكان", "ستكون", "مما", "أبو",
"بإن", "الذي", "اليه", "يمكن", "بهذا", "لدي", "وأن", "وهي", "وأبو",
"آل", "الذي", "هن", "الذى", NULL }
|
|
|
Initial value: {"ها","ان","ات","ون","ين","يه","ية",
"ه","ة","ي","\0"}
|
1.3.4