00001 /*========================================================================== 00002 * Copyright (c) 2004 University of Massachusetts. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 // 00013 // ListCache 00014 // 00015 // 24 September 2004 -- tds 00016 // 00017 // Stores previously used precomputed lists. 00018 // 00019 00020 #ifndef INDRI_LISTCACHE_HPP 00021 #define INDRI_LISTCACHE_HPP 00022 00023 #include <vector> 00024 #include "indri/delete_range.hpp" 00025 #include "indri/SimpleCopier.hpp" 00026 #include "indri/DocumentCount.hpp" 00027 #include "indri/QuerySpec.hpp" 00028 #include "indri/TreePrinterWalker.hpp" 00029 #include "indri/Parameters.hpp" 00030 namespace indri 00031 { 00032 namespace lang 00033 { 00034 00035 class ListCache { 00036 public: 00037 struct CachedList { 00038 // query structure 00039 indri::lang::SimpleCopier raw; 00040 indri::lang::SimpleCopier context; 00041 00042 // postings 00043 indri::utility::greedy_vector<indri::index::DocumentContextCount> entries; 00044 00045 // statistics about the entries 00046 INT64 occurrences; 00047 INT64 contextSize; 00048 INT64 minimumContextSize; 00049 INT64 maximumContextSize; 00050 INT64 maximumOccurrences; 00051 float maximumContextFraction; 00052 }; 00053 00054 private: 00055 std::vector<struct CachedList*> _lists; 00056 00057 public: 00058 ~ListCache() { 00059 indri::utility::delete_vector_contents( _lists ); 00060 } 00061 00062 void add( CachedList* list ) { 00063 if( _lists.size() > 100 ) { 00064 delete _lists[0]; 00065 _lists.erase( _lists.begin() ); 00066 } 00067 00068 _lists.push_back( list ); 00069 } 00070 00071 CachedList* find( indri::lang::Node* raw, indri::lang::Node* context ) { 00072 ListCache::CachedList* list = 0; 00073 size_t i = 0; 00074 00075 // TODO: use a hash function to make this faster 00076 for( i=0; i<_lists.size(); i++ ) { 00077 indri::lang::Node* cachedRaw = _lists[i]->raw.root(); 00078 indri::lang::Node* cachedContext = _lists[i]->context.root(); 00079 00080 if( *cachedRaw == *raw ) { 00081 if( ( !cachedContext && !context ) || 00082 ( cachedContext && context && (*context == *cachedContext)) ) { 00083 list = _lists[i]; 00084 break; 00085 } 00086 } 00087 } 00088 00089 return list; 00090 } 00091 }; 00092 } 00093 } 00094 00095 #endif // INDRI_LISTCACHE_HPP 00096 00097