|
Functions |
std::string | getFinalHarvestPath (const std::string &corpusPath, const std::string &filePath, const std::string &harvestPath) |
lemur::file::Keyfile * | createRedirectKeyfile (const std::string &redirectKeyfilePath, const std::string &redirectPath) |
void | harvest_anchor_text_file (const std::string &path, const std::string &linkFilePath, const std::string &docOrderPath, lemur::file::Keyfile *redirectKeyfile, indri::parse::HTMLParser &parser, indri::parse::Tokenizer *tokenizer, lemur::file::Keyfile *keyfile, const std::string &fileClass) |
void | harvest_anchor_text (const std::string &corpusPath, const std::string &fileClass, const std::string &harvestPath, const std::string &docUrlNoKeyfilePath, const std::string &preSortPath, const std::string &redirectPath) |
void | collect_harvest_paths (const std::string &corpusPath, const std::string &fileClass, const std::string &harvestPath, const std::string &docUrlNoKeyfilePath, const std::string &preSortPath, const std::string &redirectPath) |
void | combineOutputFile (const std::string &corpusFile, const std::string &sortedPath, const std::string &outputSortedLinkFile, const std::string &docOrderPath, lemur::file::Keyfile *urlKeyfile, FILE *sortedDestFile, lemur::file::Keyfile *docNoKeyfile) |
void | combineSortedFiles (const std::string &corpusPath, const std::string &harvestPath, const std::string &outputSortedLinkFile, const std::string &preSortPath, const std::string &sortedPath, lemur::file::Keyfile *docNoKeyfile) |
void | usage () |
| Prints out useful usage information.
|
int | main (int argc, char *argv[]) |
Variables |
std::vector< std::string > | harvestedLinkPaths |
indri::utility::IndriTimer | g_timer |
lemur::utility::SHA1 | SHA1Hasher |
char | _outputBuffer [5 *1024 *1024] |