#include <PDict.hpp>
Public Member Functions | |
PDict () | |
default constructor | |
~PDict () | |
clean up | |
DictEntryVector * | getTranslations (const string &term, DictEntryFilter *filter=NULL) const |
Get dictionary entries (translations) for a term. | |
int | numTranslations (const string &term, DictEntryFilter *filter=NULL) const |
Get the number dictionary entries (translations) for a term. | |
int | getNumPairs () const |
Get the total size of the dictionary. | |
int | getSourceCount () const |
Get the number of unique terms in the source vocabulary. | |
int | getTargetCount () const |
Get the number of unique terms in the target vocabulary. | |
const string & | getName () const |
Get the name of the dictionary. | |
bool | isUsingCounts () const |
Is the dictionary using counts or probabilities. | |
void | setUsingCounts (bool val) |
Set the flag for using counts or probabilities. | |
void | add (const string &source, DictEntry &value, double(*compose)(double, double)=NULL) |
Add an entry for a term. | |
void | remove (const string &source, DictEntry &value) |
Remove an entry for a term. | |
void | remove (const string &source) |
Remove all entries for a term. | |
void | write (const string &outputName, const string &delim) |
Output dictionary as plain text, separator delimited values. | |
bool | read (const string &dictName, const string &delim, bool counts=false) |
Input a dictionary from plain text, separator delimited values. The input file must contain 4 columns. The columns are: sourceterm;type;targetterm;probability; where type is an arbitrary symbol, such as a part of speech tag. | |
bool | open (const string &dictName) |
Open an existing probabilistic dictionary. | |
bool | create (const string &dictName) |
Create a new, empty probabilistic dictionary. | |
void | close () |
Close the dictionary. Flushes all buffers and closes all files. | |
void | normalize () |
Normalize probabilities of entries to sum to one Normalizes all entries, updating the dictionary. | |
void | startIteration () |
Initialize for iteration over all keys. | |
DictEntryVector * | nextTranslations (string &term, DictEntryFilter *filter=NULL) const |
Get next key's dictionary entry (translations). | |
Private Member Functions | |
void | writeTOC () const |
write toc file | |
bool | contains (const string &term, lemur::file::Keyfile &keyfile) const |
known term? | |
void | flush () |
flush the current entry to table | |
Private Attributes | |
dictStats | stats |
dictionary statistics | |
DictEntryVector * | currentVec |
Current term's entry vector. | |
bool | usingCounts |
are we storing frequencies or probabilities? | |
string | currentTerm |
Current term. | |
string | name |
base name for dictionary | |
lemur::file::Keyfile | dict |
btree for dictionary entry records. | |
lemur::file::Keyfile | targetIDs |
target vocab termName -> freq table (delete if f == 0). | |
lemur::file::File | dictEntries |
File for entry data. |
|
default constructor
|
|
clean up
|
|
Add an entry for a term.
|
|
Close the dictionary. Flushes all buffers and closes all files.
|
|
known term?
|
|
Create a new, empty probabilistic dictionary.
|
|
flush the current entry to table
|
|
Get the name of the dictionary.
|
|
Get the total size of the dictionary.
|
|
Get the number of unique terms in the source vocabulary.
|
|
Get the number of unique terms in the target vocabulary.
|
|
Get dictionary entries (translations) for a term.
|
|
Is the dictionary using counts or probabilities.
|
|
Get next key's dictionary entry (translations).
|
|
Normalize probabilities of entries to sum to one Normalizes all entries, updating the dictionary.
|
|
Get the number dictionary entries (translations) for a term.
|
|
Open an existing probabilistic dictionary.
|
|
Input a dictionary from plain text, separator delimited values. The input file must contain 4 columns. The columns are: sourceterm;type;targetterm;probability; where type is an arbitrary symbol, such as a part of speech tag.
|
|
Remove all entries for a term.
|
|
Remove an entry for a term.
|
|
Set the flag for using counts or probabilities.
|
|
Initialize for iteration over all keys.
|
|
Output dictionary as plain text, separator delimited values.
|
|
write toc file
|
|
Current term.
|
|
Current term's entry vector.
|
|
btree for dictionary entry records.
|
|
File for entry data.
|
|
base name for dictionary
|
|
dictionary statistics
|
|
target vocab termName -> freq table (delete if f == 0).
|
|
are we storing frequencies or probabilities?
|