00001 /*========================================================================== 00002 * Copyright (c) 2001 Carnegie Mellon University. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 /* 00013 10/18/2002 -- dmf Add binReadC, binWriteC, deltaDecode, and deltEncode 00014 for compression of TermInfoLists. 00015 */ 00016 00017 #ifndef _INVFPTERMLIST_HPP 00018 #define _INVFPTERMLIST_HPP 00019 00020 #include "common_headers.hpp" 00021 #include "InvFPTerm.hpp" 00022 #include "InvFPTypes.hpp" 00023 #include "RVLCompress.hpp" 00024 00025 namespace lemur 00026 { 00027 namespace file 00028 { 00029 class File; 00030 } 00031 } 00032 00033 namespace lemur 00034 { 00035 namespace index 00036 { 00042 class InvFPTermList : public lemur::api::TermInfoList { 00043 public: 00044 InvFPTermList(); 00045 InvFPTermList(lemur::api::DOCID_T did, int len, vector<LocatedTerm> &tls); 00046 ~InvFPTermList(); 00047 00049 void startIteration() const; 00050 00052 bool hasMore() const; 00053 00055 lemur::api::TermInfo *nextEntry() const; 00056 00058 virtual int size(); 00059 00062 virtual lemur::api::TermInfo* operator[](int index) const; 00063 00065 lemur::api::COUNT_T docLength() const{ return length; } 00066 00068 lemur::api::COUNT_T termCount() const{ return listlen; } 00069 00071 lemur::api::DOCID_T docID() const{ return uid; } 00072 00075 bool binRead(ifstream& infile); 00077 bool binReadC(ifstream& infile); 00079 void binWriteC(ofstream& ofile); 00080 00081 bool binReadC( lemur::file::File& infile ); 00082 void binWriteC( lemur::file::File& outfile ); 00083 00086 virtual void deltaDecode(); 00089 virtual void deltaEncode(); 00090 00092 void countTerms(); 00093 00094 protected: 00095 // Helper functions for iterator, subclasses should override 00097 virtual lemur::api::TermInfo* newElement() const { return new InvFPTerm(); } 00099 virtual lemur::api::TermInfo* getElement(lemur::api::TermInfo* elem, lemur::api::POS_T position) const; 00101 virtual void assignElement(lemur::api::TermInfo* to, lemur::api::TermInfo* from) const { 00102 *static_cast<InvFPTerm*>(to) = *static_cast<InvFPTerm*>(from); 00103 } 00105 virtual lemur::api::POS_T beginPosition() const { return (lemur::api::POS_T) 0; } 00107 virtual lemur::api::POS_T endPosition() const { return (lemur::api::POS_T) listlen; } 00109 virtual lemur::api::POS_T nextPosition(lemur::api::POS_T position) const; 00110 00111 lemur::api::DOCID_T uid; // this doc's id 00112 lemur::api::COUNT_T length; // length of this document (terms + stopwords) 00113 LocatedTerm* list; // list of terms and locations 00114 LLTerm* listcounted; // list of terms and location lists 00115 lemur::api::COUNT_T listlen; // number of items we have in list (same as number of terms) 00116 mutable int index; // index for iterator 00117 lemur::api::LOC_T* counts; // keep track of counts of terms for bag of word 00118 mutable InvFPTerm entry; 00119 mutable vector<lemur::api::LOC_T> loclist; //list of locations to return 00120 00121 }; 00122 } 00123 } 00124 #endif