00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef _INVDOCLIST_HPP
00014 #define _INVDOCLIST_HPP
00015
00016
00017
00018
00019
00020
00021 #include <cmath>
00022 #include "InvFPTypes.hpp"
00023 #include "common_headers.hpp"
00024 #include "DocInfoList.hpp"
00025 #include "MemCache.hpp"
00026 #include "RVLCompress.hpp"
00027
00028 extern "C" {
00029 #include <cstdio>
00030 }
00031
00032 #define DEFAULT 9
00033 namespace lemur
00034 {
00035 namespace index
00036 {
00037
00038 class InvDocList: public lemur::api::DocInfoList {
00039 public:
00040 InvDocList();
00041
00044 InvDocList(lemur::api::TERMID_T id, int len);
00046 InvDocList(lemur::utility::MemCache* mc, lemur::api::TERMID_T id, int len);
00047 InvDocList(lemur::utility::MemCache* mc, lemur::api::TERMID_T id, int len,
00048 lemur::api::DOCID_T docid, lemur::api::LOC_T location);
00050 InvDocList(lemur::api::TERMID_T id, int listlen,
00051 lemur::api::LOC_T* list, int fr,
00052 lemur::api::DOCID_T* ldocid, int len);
00053 ~InvDocList();
00054
00060 void setList(lemur::api::TERMID_T id, int listlen,
00061 lemur::api::LOC_T* list, int fr,
00062 lemur::api::DOCID_T* ldocid=NULL, int len=0);
00063
00067 void setListSafe(lemur::api::TERMID_T id, int listlen,
00068 lemur::api::LOC_T* list, int fr,
00069 lemur::api::DOCID_T* ldocid, int len);
00070
00074 void reset();
00075
00078 void resetFree();
00079
00080 bool allocMem();
00081 bool hasNoMem();
00082
00084 virtual bool addTerm(lemur::api::DOCID_T docid);
00085
00087 virtual bool append(InvDocList* tail);
00088
00089 virtual void startIteration() const;
00090 virtual bool hasMore() const;
00091 virtual lemur::api::DocInfo* nextEntry() const;
00092 virtual void nextEntry(lemur::api::DocInfo* info) const;
00093
00094 lemur::api::DOCID_T curDocID() const{
00095 if (lastid == NULL) return -1; return *lastid;
00096 };
00097 lemur::api::COUNT_T docFreq() const{ return df; };
00098 int length() const{ return end-begin; };
00099 lemur::api::TERMID_T termID() const{ return uid; };
00100 int termLen() const{ return strlength; };
00101 virtual lemur::api::COUNT_T termCTF() const;
00102 int curDocIDdiff() const{ return lastid-begin; };
00103 int curDocIDtf() const{ return *(lastid+1); };
00104 int memorySize() const{ return size; };
00105
00107 void binWrite(ofstream& of);
00108
00110 bool binRead(ifstream& inf);
00111
00113 void binWriteC(ofstream& of);
00114
00116 bool binReadC(ifstream& inf);
00117
00118 protected:
00119
00121 virtual lemur::api::DocInfo* getElement(lemur::api::DocInfo* elem,
00122 lemur::api::POS_T position) const;
00124 virtual lemur::api::POS_T beginPosition() const { return (lemur::api::POS_T) 0; }
00126 virtual lemur::api::POS_T endPosition() const { return (lemur::api::POS_T) (end - begin); }
00128 virtual lemur::api::POS_T nextPosition(lemur::api::POS_T position) const;
00129
00133 bool getMoreMem();
00134 int logb2(int num);
00135
00138 virtual void deltaEncode();
00139
00142 virtual void deltaDecode();
00143
00144
00145 lemur::api::LOC_T* begin;
00146 lemur::api::LOC_T* lastid;
00147 lemur::api::LOC_T* freq;
00148 lemur::api::LOC_T * end;
00149 mutable lemur::api::LOC_T* iter;
00150 int size;
00151 int LOC_Tsize;
00152 int strlength;
00153 lemur::api::TERMID_T uid;
00154 lemur::api::COUNT_T df;
00155 lemur::utility::MemCache* cache;
00156 bool hascache;
00157
00158 bool READ_ONLY;
00159 private:
00160 mutable lemur::api::DocInfo entry;
00161 };
00162 }
00163 }
00164
00165 #endif