00001 /*========================================================================== 00002 * Copyright (c) 2005 University of Massachusetts. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 // 00013 // DiskFrequentVocabularyIterator 00014 // 00015 // 19 January 2005 -- tds 00016 // 00017 00018 #ifndef INDRI_DISKFREQUENTVOCABULARYITERATOR_HPP 00019 #define INDRI_DISKFREQUENTVOCABULARYITERATOR_HPP 00020 00021 #include "indri/File.hpp" 00022 #include "indri/Buffer.hpp" 00023 #include "indri/TermData.hpp" 00024 #include "indri/VocabularyIterator.hpp" 00025 #include "indri/RVLDecompressStream.hpp" 00026 00027 namespace indri { 00028 namespace index { 00029 class DiskFrequentVocabularyIterator : public VocabularyIterator { 00030 private: 00031 indri::file::File& _file; 00032 indri::utility::RVLDecompressStream _stream; 00033 indri::utility::Buffer _buffer; 00034 const char* _current; 00035 int _fieldCount; 00036 bool _finished; 00037 00038 DiskTermData* _data; 00039 char *_dataBuffer; 00040 00041 // this tells us if the last nextEntry() came from 00042 // a start iteration or not - needed for nextEntry(const char*) 00043 // call 00044 bool _justStartedIteration; 00045 00046 public: 00047 DiskFrequentVocabularyIterator( indri::file::File& frequentTermsData, int fieldCount ); 00048 ~DiskFrequentVocabularyIterator() { delete[] _dataBuffer; }; 00049 00050 void startIteration(); 00051 bool finished(); 00052 bool nextEntry(); 00053 bool nextEntry(const char *skipTo); 00054 DiskTermData* currentEntry(); 00055 }; 00056 } 00057 } 00058 00059 #endif // INDRI_DISKFREQUENTVOCABULARYITERATOR_HPP 00060