00001 /*========================================================================== 00002 * Copyright (c) 2004 University of Massachusetts. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 // 00013 // DiskDocListIterator 00014 // 00015 // 10 December 2004 -- tds 00016 // 00017 00018 #ifndef INDRI_DISKDOCLISTITERATOR_HPP 00019 #define INDRI_DISKDOCLISTITERATOR_HPP 00020 00021 #include "indri/DocListIterator.hpp" 00022 #include "indri/SequentialReadBuffer.hpp" 00023 #include "Keyfile.hpp" 00024 00025 namespace indri { 00026 namespace index { 00027 class DiskDocListIterator : public DocListIterator { 00028 private: 00029 const char* _list; 00030 const char* _listEnd; 00031 lemur::api::DOCID_T _skipDocument; 00032 00033 indri::file::SequentialReadBuffer* _file; 00034 UINT64 _startOffset; 00035 UINT64 _endOffset; 00036 bool _hasTopdocs; 00037 bool _isFrequent; 00038 00039 indri::utility::greedy_vector<TopDocument> _topdocs; 00040 DocumentData _data; 00041 DocumentData* _result; 00042 00043 TermData* _termData; 00044 bool _ownTermData; 00045 char _term[ lemur::file::Keyfile::MAX_KEY_LENGTH+1 ]; 00046 int _fieldCount; 00047 00048 void _readEntry(); 00049 void _readSkip(); 00050 void _readTopdocs(); 00051 void _readTermData( int headerLength ); 00052 00053 public: 00054 DiskDocListIterator( indri::file::SequentialReadBuffer* buffer, UINT64 startOffset, int fieldCount ); 00055 ~DiskDocListIterator(); 00056 void setStartOffset( UINT64 startOffset, TermData* termData ); 00057 00058 const indri::utility::greedy_vector<TopDocument>& topDocuments(); 00059 00060 void startIteration(); 00061 bool nextEntry(); 00062 bool nextEntry( lemur::api::DOCID_T documentID ); 00063 DocumentData* currentEntry(); 00064 bool finished(); 00065 bool isFrequent() const; 00066 TermData* termData(); 00067 }; 00068 } 00069 } 00070 00071 #endif // INDRI_DISKDOCLISTITERATOR_HPP 00072