00001 /*========================================================================== 00002 * Copyright (c) 2004 University of Massachusetts. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 // 00013 // MemoryIndexDocListFileIterator 00014 // 00015 // 23 November 2004 -- tds 00016 // 00017 00018 #ifndef INDRI_MEMORYINDEXDOCLISTFILEITERATOR_HPP 00019 #define INDRI_MEMORYINDEXDOCLISTFILEITERATOR_HPP 00020 00021 #include "indri/Mutex.hpp" 00022 #include "indri/TermData.hpp" 00023 #include "indri/DocListFileIterator.hpp" 00024 #include "indri/DocListMemoryBuilder.hpp" 00025 #include <algorithm> 00026 #include <iostream> // DEBUG 00027 00028 namespace indri { 00029 namespace index { 00030 class MemoryIndexDocListFileIterator : public DocListFileIterator { 00031 private: 00032 const std::vector<MemoryIndex::term_entry*>& _termData; 00033 std::vector<MemoryIndex::term_entry*> _alphabetical; 00034 std::vector<MemoryIndex::term_entry*>::iterator _currentTerm; 00035 DocListMemoryBuilderIterator _iterator; 00036 DocListData _data; 00037 bool _finished; 00038 00039 public: 00040 MemoryIndexDocListFileIterator( const std::vector<MemoryIndex::term_entry*>& termData ) : 00041 _termData(termData) 00042 { 00043 } 00044 00045 void startIteration() { 00046 _finished = false; 00047 _alphabetical.clear(); 00048 _alphabetical.reserve( _termData.size() ); 00049 00050 for( size_t i=0; i<_termData.size(); i++ ) { 00051 _alphabetical.push_back( _termData[i] ); 00052 } 00053 00054 std::sort( _alphabetical.begin(), _alphabetical.end(), MemoryIndex::term_entry::term_less() ); 00055 00056 _currentTerm = _alphabetical.begin(); 00057 _data.termData = 0; 00058 _data.iterator = 0; 00059 00060 if( _currentTerm != _alphabetical.end() ) { 00061 _data.termData = (*_currentTerm)->termData; 00062 _data.iterator = &_iterator; 00063 _iterator.reset( (*_currentTerm)->list, _data.termData ); 00064 00065 assert( (*_currentTerm)->list.documentFrequency() == _data.termData->corpus.documentCount ); 00066 assert( (*_currentTerm)->list.termFrequency() == _data.termData->corpus.totalCount ); 00067 } else { 00068 _finished = true; 00069 } 00070 } 00071 00072 bool finished() const { 00073 return _finished; 00074 } 00075 00076 DocListData* currentEntry() { 00077 if( !_finished ) 00078 return &_data; 00079 00080 return 0; 00081 } 00082 00083 const DocListData* currentEntry() const { 00084 if( !_finished ) 00085 return &_data; 00086 00087 return 0; 00088 } 00089 00090 bool nextEntry() { 00091 if( _finished ) 00092 return false; 00093 _currentTerm++; 00094 00095 if( _currentTerm == _alphabetical.end() ) { 00096 _finished = true; 00097 return false; 00098 } 00099 00100 _data.termData = (*_currentTerm)->termData; 00101 _iterator.reset( (*_currentTerm)->list, _data.termData ); 00102 00103 assert( (*_currentTerm)->list.documentFrequency() == _data.termData->corpus.documentCount ); 00104 assert( (*_currentTerm)->list.termFrequency() == _data.termData->corpus.totalCount ); 00105 return true; 00106 } 00107 }; 00108 } 00109 } 00110 00111 #endif // INDRI_MEMORYINDEXDOCLISTFILEITERATOR_HPP 00112