Lemur: ListCache.hpp Source File

00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 //
00013 // ListCache
00014 //
00015 // 24 September 2004 -- tds
00016 //
00017 // Stores previously used precomputed lists.
00018 //
00019 
00020 #ifndef INDRI_LISTCACHE_HPP
00021 #define INDRI_LISTCACHE_HPP
00022 
00023 #include <vector>
00024 #include "indri/delete_range.hpp"
00025 #include "indri/SimpleCopier.hpp"
00026 #include "indri/DocumentCount.hpp"
00027 #include "indri/QuerySpec.hpp"
00028 #include "indri/TreePrinterWalker.hpp"
00029 #include "indri/Parameters.hpp"
00030 namespace indri
00031 {
00032   namespace lang
00033   {
00034     
00035     class ListCache {
00036     public:
00037       struct CachedList {
00038         // query structure
00039         indri::lang::SimpleCopier raw;
00040         indri::lang::SimpleCopier context;
00041 
00042         // postings
00043         indri::utility::greedy_vector<indri::index::DocumentContextCount> entries;
00044 
00045         // statistics about the entries
00046         INT64 occurrences;
00047         INT64 contextSize;
00048         INT64 minimumContextSize;
00049         INT64 maximumContextSize;
00050         INT64 maximumOccurrences;
00051         float maximumContextFraction;
00052       };
00053 
00054     private:
00055       std::vector<struct CachedList*> _lists;
00056 
00057     public:
00058       ~ListCache() {
00059         indri::utility::delete_vector_contents( _lists );
00060       }
00061 
00062       void add( CachedList* list ) {
00063         if( _lists.size() > 100 ) {
00064           delete _lists[0];
00065           _lists.erase( _lists.begin() );
00066         }
00067 
00068         _lists.push_back( list );
00069       }
00070 
00071       CachedList* find( indri::lang::Node* raw, indri::lang::Node* context ) {
00072         ListCache::CachedList* list = 0;
00073         size_t i = 0;
00074 
00075         // TODO: use a hash function to make this faster
00076         for( i=0; i<_lists.size(); i++ ) {
00077           indri::lang::Node* cachedRaw = _lists[i]->raw.root();
00078           indri::lang::Node* cachedContext = _lists[i]->context.root();
00079 
00080           if( *cachedRaw == *raw ) {
00081             if( ( !cachedContext && !context ) ||
00082                 ( cachedContext && context && (*context == *cachedContext)) ) {
00083               list = _lists[i];
00084               break;
00085             } 
00086           }
00087         }
00088 
00089         return list;
00090       }
00091     };
00092   }
00093 }
00094 
00095 #endif // INDRI_LISTCACHE_HPP
00096 
00097