#include <WARCDocumentIterator.hpp>
Public Member Functions | |
| WARCRecord (gzFile &in, indri::utility::Buffer &buf) | |
| ~WARCRecord () | |
| std::string | getWarcType () |
| std::string | getUUID () |
| std::string | getTrecID () |
| std::string | getTargetURI () |
| const char * | getHeader () |
| const char * | getContent () |
| std::string | getMetadata (const char *key) |
| bool | readRecord () |
Static Public Attributes | |
| const char * | WARCTYPE = "WARC-Type" |
| const char * | WARCRECORDID = "WARC-Record-ID" |
| const char * | CONTENTLENGTH = "Content-Length" |
| const char * | WARCTARGETURI = "WARC-Target-URI" |
| const char * | WARCTRECID = "WARC-TREC-ID" |
Private Member Functions | |
| bool | _readLine (char *&beginLine, size_t &lineLength) |
| bool | readHeader () |
| bool | readContent () |
Private Attributes | |
| std::string | warcType |
| std::string | uuid |
| std::string | trecID |
| std::string | targetURI |
| int | contentLength |
| indri::utility::HashTable< std::string, std::string > | metadata |
| std::string | header |
| const char * | content |
| gzFile & | _gzin |
| indri::utility::Buffer & | _buffer |
|
||||||||||||
|
|
|
|
|
|
||||||||||||
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1.3.4