|
Public Member Functions |
| Combiner (int bins=10) |
void | combineRedirectDestinationBuckets (const std::string &tmpPath) |
void | combineBuckets (const std::string &outputPath, const std::string &tmpPath) |
void | hashRedirectTargets (const std::string &bucketPath, const std::string &redirectsPath) |
void | hashToBuckets (const std::string &bucketPath, const std::string &inputPath) |
void | sortCorpusFiles (const std::string &outputPath, const std::string &preSortPath, const std::string &inputPath) |
Private Types |
typedef indri::utility::HashTable<
char *, url_entry *, strhash,
strcompst > | UrlEntryTable |
typedef indri::utility::HashTable<
char *, std::vector< url_entry * >,
strhash, strcompst > | UrlEntryVectorTable |
Private Member Functions |
url_entry * | _newUrlEntry (const char *url, const char *corpusPath, const char *docNo) |
void | _deleteUrlEntry (void *buffer) |
void | _readLinks (UrlEntryTable &urlTable, std::ifstream &linkIn) |
void | _readRedirects (UrlEntryTable &urlTable, const std::string &redirectPath, int number) |
void | _writeCorpusTable (UrlEntryVectorTable &corpusTable, const std::string &outputPath) |
void | _hashToCorpusTable (UrlEntryVectorTable &corpusTable, UrlEntryTable &urlTable) |
void | _openWriteBuckets (std::vector< std::stringstream * > &buffers, std::vector< std::ofstream * > &buckets, const std::string &path, int bins) |
void | _flushWriteBuffer (std::vector< std::stringstream * > &buffers, std::vector< std::ofstream * > &buckets, bool force, int i) |
void | _flushWriteBuffers (std::vector< std::stringstream * > &buffers, std::vector< std::ofstream * > &buckets, bool force) |
void | _closeWriteBuckets (std::vector< std::stringstream * > &buffers, std::vector< std::ofstream * > &buckets) |
void | _openReadBuckets (std::vector< std::ifstream * > &buckets, const std::string &path, int bins) |
void | _readDocBucket (UrlEntryTable &urlTable, std::ifstream &docIn) |
int | hashString (const char *str) |
void | hashToBuckets (std::ifstream &in, const std::string &path) |
void | createBuckets (const std::string &tmpPath) |
void | closeBuckets () |
void | combineBucket (const std::string &outputPath, const std::string &tmpPath, int bucket) |
void | hashToBuckets (const std::string &inputPath) |
void | combineRedirectDestinationBucket (const std::string &tmpPath, int i, std::vector< std::stringstream * > &outBuffers, std::vector< std::ofstream * > &outputFiles) |
Private Attributes |
std::vector< std::ofstream * > | _docBucketFiles |
std::vector< std::ofstream * > | _linkBucketFiles |
std::vector< std::stringstream * > | _docBuckets |
std::vector< std::stringstream * > | _linkBuckets |
int | _bins |