00001 /*========================================================================== 00002 * Copyright (c) 2004 University of Massachusetts. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 // 00013 // MemoryIndexTermListFileIterator 00014 // 00015 // 24 November 2004 -- tds 00016 // 00017 00018 #ifndef INDRI_MEMORYINDEXTERMLISTFILEITERATOR_HPP 00019 #define INDRI_MEMORYINDEXTERMLISTFILEITERATOR_HPP 00020 00021 #include <list> 00022 #include <vector> 00023 #include "indri/Buffer.hpp" 00024 #include "indri/indri-platform.h" 00025 #include "indri/DocumentData.hpp" 00026 #include "indri/TermList.hpp" 00027 #include "indri/TermListFileIterator.hpp" 00028 00029 namespace indri { 00030 namespace index { 00031 class MemoryIndexTermListFileIterator : public TermListFileIterator { 00032 private: 00033 // the buffers contain compressed TermLists, packed together in 1MB memory regions 00034 // the _buffersIterator points to the last buffer we looked at, while _bufferBase 00035 // contains the number of bytes in all previous buffers 00036 std::list<indri::utility::Buffer*>& _buffers; 00037 std::list<indri::utility::Buffer*>::iterator _buffersIterator; 00038 UINT64 _bufferBase; 00039 bool _finished; 00040 00041 std::vector<DocumentData>& _data; 00042 TermList _list; 00043 int _index; // index into _documentData of the current document 00044 00045 public: 00046 MemoryIndexTermListFileIterator( std::list<indri::utility::Buffer*>& buffers, std::vector<DocumentData>& data ); 00047 00048 void startIteration(); 00049 bool nextEntry(); 00050 bool nextEntry( lemur::api::DOCID_T documentID ); 00051 bool finished(); 00052 TermList* currentEntry(); 00053 }; 00054 } 00055 } 00056 00057 #endif // INDRI_MEMORYINDEXTERMLISTFILEITERATOR_HPP 00058