00001 /*========================================================================== 00002 * Copyright (c) 2004 University of Massachusetts. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 // 00013 // MemoryIndexVocabularyIterator 00014 // 00015 // 23 November 2004 -- tds 00016 // 00017 00018 #ifndef INDRI_MEMORYINDEXVOCABULARYITERATOR_HPP 00019 #define INDRI_MEMORYINDEXVOCABULARYITERATOR_HPP 00020 00021 #include "indri/Mutex.hpp" 00022 #include "indri/TermData.hpp" 00023 #include "indri/DiskTermData.hpp" 00024 00025 namespace indri { 00026 namespace index { 00027 class MemoryIndexVocabularyIterator : public VocabularyIterator { 00028 private: 00029 typedef std::vector<MemoryIndex::term_entry*> VTermEntry; 00030 VTermEntry& _termData; 00031 VTermEntry::iterator _iterator; 00032 DiskTermData _diskTermData; 00033 00034 // this tells us if the last nextEntry() came from 00035 // a start iteration or not - needed for nextEntry(const char*) 00036 // call 00037 bool _justStartedIteration; 00038 00039 public: 00040 MemoryIndexVocabularyIterator( VTermEntry& termData ) : 00041 _termData(termData) 00042 { 00043 } 00044 00045 void startIteration() { 00046 _iterator = _termData.begin(); 00047 00048 _diskTermData.length = 0; 00049 _diskTermData.startOffset = 0; 00050 00051 if( _iterator != _termData.end() ) { 00052 _diskTermData.termData = (*_iterator)->termData; 00053 _diskTermData.termID = (*_iterator)->termID; 00054 } 00055 00056 _justStartedIteration=true; 00057 } 00058 00059 DiskTermData* currentEntry() { 00060 if( _iterator == _termData.end() ) 00061 return 0; 00062 00063 return &_diskTermData; 00064 } 00065 00066 bool nextEntry() { 00067 if( finished() ) 00068 return false; 00069 00070 _iterator++; 00071 00072 if( finished() ) 00073 return false; 00074 00075 _diskTermData.termID++; 00076 _diskTermData.termData = (*_iterator)->termData; 00077 return true; 00078 } 00079 00080 bool nextEntry(const char *skipTo) { 00081 assert(skipTo!=NULL); 00082 00083 int termLength=strlen(skipTo); 00084 if (!termLength) { 00085 startIteration(); 00086 return true; 00087 } 00088 00089 if (!_justStartedIteration) { 00090 _iterator++; 00091 } 00092 00093 _justStartedIteration=false; 00094 00095 while (_iterator!=_termData.end()) { 00096 00097 if (strstr((*_iterator)->term, skipTo)==(*_iterator)->term) { 00098 return true; 00099 } 00100 _iterator++; 00101 } 00102 00103 // return false... 00104 return false; 00105 } 00106 00107 bool finished() { 00108 return _iterator == _termData.end(); 00109 } 00110 }; 00111 } 00112 } 00113 00114 #endif // INDRI_MEMORYINDEXVOCABULARYITERATOR_HPP 00115