Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

TermTranslator.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2005 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 //
00013 // TermTranslator
00014 //
00015 // 14 January 2005 -- tds
00016 //
00017 
00018 #ifndef INDRI_TERMTRANSLATOR_HPP
00019 #define INDRI_TERMTRANSLATOR_HPP
00020 
00021 #include "indri/HashTable.hpp"
00022 #include <vector>
00023 #include "indri/TermBitmap.hpp"
00024 
00025 namespace indri {
00026   namespace index {
00027     class TermTranslator {
00028     private:
00029       TermBitmap* _bitmap;
00030       int _previousFrequentCount;
00031       int _currentFrequentCount;
00032       int _previousTermCount;
00033       int _currentTermCount;
00034 
00035       std::vector<lemur::api::TERMID_T>* _frequentMap;
00036       indri::utility::HashTable<lemur::api::TERMID_T, lemur::api::TERMID_T>* _wasInfrequentMap;
00037 
00038     public:
00039       ~TermTranslator() {
00040         delete _frequentMap;
00041       }
00042 
00043       TermTranslator( int previousFrequentCount,
00044                       int currentFrequentCount,
00045                       int previousTermCount,
00046                       int currentTermCount,
00047                       std::vector<lemur::api::TERMID_T>* frequentMap,
00048                       indri::utility::HashTable<lemur::api::TERMID_T, lemur::api::TERMID_T>* wasInfrequentMap,
00049                       TermBitmap* bitmap ) 
00050         :
00051         _bitmap(bitmap),
00052         _frequentMap(frequentMap),
00053         _wasInfrequentMap(wasInfrequentMap)
00054       {
00055         assert( currentFrequentCount >= 0 );
00056         assert( previousFrequentCount >= 0 );
00057         assert( previousTermCount >= 0 );
00058         assert( currentTermCount >= 0 );
00059 
00060         _previousFrequentCount = previousFrequentCount;
00061         _currentFrequentCount = currentFrequentCount;
00062         _previousTermCount = previousTermCount;
00063         _currentTermCount = currentTermCount;
00064       }
00065 
00066       lemur::api::TERMID_T operator() ( lemur::api::TERMID_T termID ) {
00067         assert( termID >= 0 );
00068         assert( termID <= _previousTermCount );
00069         lemur::api::TERMID_T result = 0;
00070         lemur::api::TERMID_T* value;
00071 
00072         if( termID <= _previousFrequentCount ) {
00073           // common case, termID is a frequent term
00074           assert( _frequentMap->size() > termID );
00075           result = (*_frequentMap)[termID];
00076         } else {
00077           // term may have become frequent, so check the wasInfrequentMap
00078           value = (*_wasInfrequentMap).find( termID - _previousFrequentCount - 1 );
00079 
00080           if( value ) {
00081             result = *value;
00082           } else {
00083             // term wasn't frequent and isn't now either, so get it from the bitmap
00084             result = 1 + _currentFrequentCount + _bitmap->get( termID - _previousFrequentCount - 1 );
00085           }
00086         }
00087 
00088         assert( result >= 0 );
00089         assert( result <= _currentTermCount );
00090         return result;
00091       }
00092     };
00093   }
00094 }
00095 
00096 #endif // INDRI_TERMTRANSLATOR_HPP
00097 
00098 
00099 

Generated on Tue Jun 15 11:02:56 2010 for Lemur by doxygen 1.3.4