#include <PDict.hpp>
Public Member Functions | |
| PDict () | |
| default constructor | |
| ~PDict () | |
| clean up | |
| DictEntryVector * | getTranslations (const string &term, DictEntryFilter *filter=NULL) const |
| Get dictionary entries (translations) for a term. | |
| int | numTranslations (const string &term, DictEntryFilter *filter=NULL) const |
| Get the number dictionary entries (translations) for a term. | |
| int | getNumPairs () const |
| Get the total size of the dictionary. | |
| int | getSourceCount () const |
| Get the number of unique terms in the source vocabulary. | |
| int | getTargetCount () const |
| Get the number of unique terms in the target vocabulary. | |
| const string & | getName () const |
| Get the name of the dictionary. | |
| bool | isUsingCounts () const |
| Is the dictionary using counts or probabilities. | |
| void | setUsingCounts (bool val) |
| Set the flag for using counts or probabilities. | |
| void | add (const string &source, DictEntry &value, double(*compose)(double, double)=NULL) |
| Add an entry for a term. | |
| void | remove (const string &source, DictEntry &value) |
| Remove an entry for a term. | |
| void | remove (const string &source) |
| Remove all entries for a term. | |
| void | write (const string &outputName, const string &delim) |
| Output dictionary as plain text, separator delimited values. | |
| bool | read (const string &dictName, const string &delim, bool counts=false) |
| Input a dictionary from plain text, separator delimited values. The input file must contain 4 columns. The columns are: sourceterm;type;targetterm;probability; where type is an arbitrary symbol, such as a part of speech tag. | |
| bool | open (const string &dictName) |
| Open an existing probabilistic dictionary. | |
| bool | create (const string &dictName) |
| Create a new, empty probabilistic dictionary. | |
| void | close () |
| Close the dictionary. Flushes all buffers and closes all files. | |
| void | normalize () |
| Normalize probabilities of entries to sum to one Normalizes all entries, updating the dictionary. | |
| void | startIteration () |
| Initialize for iteration over all keys. | |
| DictEntryVector * | nextTranslations (string &term, DictEntryFilter *filter=NULL) const |
| Get next key's dictionary entry (translations). | |
Private Member Functions | |
| void | writeTOC () const |
| write toc file | |
| bool | contains (const string &term, lemur::file::Keyfile &keyfile) const |
| known term? | |
| void | flush () |
| flush the current entry to table | |
Private Attributes | |
| dictStats | stats |
| dictionary statistics | |
| DictEntryVector * | currentVec |
| Current term's entry vector. | |
| bool | usingCounts |
| are we storing frequencies or probabilities? | |
| string | currentTerm |
| Current term. | |
| string | name |
| base name for dictionary | |
| lemur::file::Keyfile | dict |
| btree for dictionary entry records. | |
| lemur::file::Keyfile | targetIDs |
| target vocab termName -> freq table (delete if f == 0). | |
| lemur::file::File | dictEntries |
| File for entry data. | |
|
|
default constructor
|
|
|
clean up
|
|
||||||||||||||||
|
Add an entry for a term.
|
|
|
Close the dictionary. Flushes all buffers and closes all files.
|
|
||||||||||||
|
known term?
|
|
|
Create a new, empty probabilistic dictionary.
|
|
|
flush the current entry to table
|
|
|
Get the name of the dictionary.
|
|
|
Get the total size of the dictionary.
|
|
|
Get the number of unique terms in the source vocabulary.
|
|
|
Get the number of unique terms in the target vocabulary.
|
|
||||||||||||
|
Get dictionary entries (translations) for a term.
|
|
|
Is the dictionary using counts or probabilities.
|
|
||||||||||||
|
Get next key's dictionary entry (translations).
|
|
|
Normalize probabilities of entries to sum to one Normalizes all entries, updating the dictionary.
|
|
||||||||||||
|
Get the number dictionary entries (translations) for a term.
|
|
|
Open an existing probabilistic dictionary.
|
|
||||||||||||||||
|
Input a dictionary from plain text, separator delimited values. The input file must contain 4 columns. The columns are: sourceterm;type;targetterm;probability; where type is an arbitrary symbol, such as a part of speech tag.
|
|
|
Remove all entries for a term.
|
|
||||||||||||
|
Remove an entry for a term.
|
|
|
Set the flag for using counts or probabilities.
|
|
|
Initialize for iteration over all keys.
|
|
||||||||||||
|
Output dictionary as plain text, separator delimited values.
|
|
|
write toc file
|
|
|
Current term.
|
|
|
Current term's entry vector.
|
|
|
btree for dictionary entry records.
|
|
|
File for entry data.
|
|
|
base name for dictionary
|
|
|
dictionary statistics
|
|
|
target vocab termName -> freq table (delete if f == 0).
|
|
|
are we storing frequencies or probabilities?
|
1.3.4