#include <WARCDocumentIterator.hpp>
Public Member Functions | |
WARCRecord (gzFile &in, indri::utility::Buffer &buf) | |
~WARCRecord () | |
std::string | getWarcType () |
std::string | getUUID () |
std::string | getTrecID () |
std::string | getTargetURI () |
const char * | getHeader () |
const char * | getContent () |
std::string | getMetadata (const char *key) |
bool | readRecord () |
Static Public Attributes | |
const char * | WARCTYPE = "WARC-Type" |
const char * | WARCRECORDID = "WARC-Record-ID" |
const char * | CONTENTLENGTH = "Content-Length" |
const char * | WARCTARGETURI = "WARC-Target-URI" |
const char * | WARCTRECID = "WARC-TREC-ID" |
Private Member Functions | |
bool | _readLine (char *&beginLine, size_t &lineLength) |
bool | readHeader () |
bool | readContent () |
Private Attributes | |
std::string | warcType |
std::string | uuid |
std::string | trecID |
std::string | targetURI |
int | contentLength |
indri::utility::HashTable< std::string, std::string > | metadata |
std::string | header |
const char * | content |
gzFile & | _gzin |
indri::utility::Buffer & | _buffer |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|