|
Public Member Functions |
| | PLSA (const lemur::api::Index &dbIndex, int numCats, lemur::utility::HashFreqVector **train, lemur::utility::HashFreqVector **validate, int numIter, int numRestarts, double betastart, double betastop, double anneal, double betaMod) |
| | building with provided train/test partitions
|
| | PLSA (const lemur::api::Index &dbIndex, int testPercentage, int numCats, int numIter, int numRestarts, double betastart, double betastop, double anneal, double betaMod) |
| | building without provided train/test partitions
|
| | PLSA (const lemur::api::Index &dbIndex) |
| | for using prebuilt tables.
|
| virtual | ~PLSA () |
| void | iterateWithRestarts () |
| | Start things going.
|
| double * | get_p_z () const |
| double ** | get_p_w_z () const |
| | P(w|z) matrix.
|
| double ** | get_p_d_z () const |
| | P(d|z) matrix.
|
| double | getProb (int d, int w) const |
| | P(d,w).
|
| int | numWords () const |
| | number of terms
|
| int | numDocs () const |
| | number of docs
|
| int | numCats () const |
| | number of categories
|
| bool | readArrays () |
Private Types |
| enum | pType { P_Z = 0,
P_W_Z = 1,
P_D_Z = 2
} |
| | read/write array options. More...
|
Private Member Functions |
| void | setPrevToCurrent () |
| | Copy current iteration data to previous iteration data.
|
| void | setCurrentToBest () |
| | Copy best iteration data to current iteration data.
|
| void | setBestToCurrent () |
| | Copy current iteration data to best iteration data.
|
| void | setBestToPrev () |
| | Copy previous iteration data to best iteration data.
|
| void | setPrevToBest () |
| | Copy best iteration data to previous iteration data.
|
| double | getAverageLikelihood () |
| double | getAverageLikelihoodPrev () |
| double | jointEstimate (int indexD, int indexW) |
| | Estimates P(d,w) using previous parameter estimates.
|
| double | jointEstimateCurrent (int indexD, int indexW) |
| | Estimates P(d,w) using current parameter estimates.
|
| double | jointEstimateBest (int indexD, int indexW) |
| | Estimates P(d,w) using best parameter estimates.
|
| double | jointEstimateBeta (int indexD, int indexW) |
| void | iterate () |
| | main routine for model training.
|
| void | initializeParameters () |
| | Initialize the prev probability arrays to random values.
|
| double | doLogLikelihood (jointfuncType, lemur::utility::HashFreqVector **&myData) |
| double | logLikelihood () |
| | Calculate the training data log-likelihood using prev parameters.
|
| double | validateDataLogLikelihood () |
| | Calculate the hold out data log-likelihood using prev parameters.
|
| double | validateCurrentLogLikelihood () |
| | Calculate the hold out data log-likelihood using current parameters.
|
| double | bestDataLogLikelihood () |
| | Calculate the hold out data log-likelihood using the best parameters.
|
| double | interleavedIterationEM () |
| | performs one EM iteration, returns log likelihood of training data
|
| void | selectTestTrain (int testPercent) |
| | Select training/test events.
|
| void | init () |
| | Initialize attributes.
|
| void | initR () |
| | Initialize R and w->d inverted list.
|
| void | writeArrays () |
| | write out all the arrays to file.
|
| bool | readArray (ifstream &infile, enum pType which) |
| | Read a probability array (matrix) from a file.
|
| void | writeArray (ofstream &ofile, enum pType which) |
| | Write a probability array (matrix) to a file.
|
Private Attributes |
| const lemur::api::Index & | ind |
| | Index to use.
|
| int | sizeZ |
| | number of categories
|
| int | sizeD |
| | number of documents
|
| int | sizeW |
| | number of words
|
| lemur::utility::HashFreqVector ** | data |
| | train d->w freq list
|
| lemur::utility::HashFreqVector ** | testData |
| | test (validation) d->w freq list
|
| set< int, less< int > > * | invIndex |
| | w->d inverted index for M step of P(w | z)
|
| double | startBeta |
| | Beta for TEM.
|
| double | beta |
| | Beta for TEM.
|
| double | betaMin |
| | Beta for TEM.
|
| double | betaModifier |
| | eta for TEM (beta = eta * beta;)
|
| double | annealcue |
| | annealcue value (delta)
|
| int | R |
| | used in M step for p_z
|
| int | numberOfIterations |
| | How many iterations.
|
| int | numberOfRestarts |
| | How many restarts.
|
| double | bestTestLL |
| | Best log likelihood on the test data so far.
|
| double | bestA |
| | Best average log likelihood on the test data so far.
|
| bool | bestOnly |
| | have we only loaded existing tables from files
|
| bool | ownMem |
| | did we allocate the test/train vectors?
|
| double * | p_z_current |
| | P(z) vector current iteration.
|
| double ** | p_w_z_current |
| | P(w|z) matrix current iteration.
|
| double ** | p_d_z_current |
| | P(d|z) matrix current iteration.
|
| double * | p_z_prev |
| | P(z) vector previous iteration.
|
| double ** | p_w_z_prev |
| | P(w|z) matrix previous iteration.
|
| double ** | p_d_z_prev |
| | P(d|z) matrix previous iteration.
|
| double * | p_z_best |
| | P(z) vector best iteration.
|
| double ** | p_w_z_best |
| | P(w|z) matrix best iteration.
|
| double ** | p_d_z_best |
| | P(d|z) matrix best iteration.
|