00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #ifndef INDRI_DOCEXTENTLISTMEMORYBUILDER_HPP
00019 #define INDRI_DOCEXTENTLISTMEMORYBUILDER_HPP
00020
00021 #include "indri/indri-platform.h"
00022 #include "indri/greedy_vector"
00023 #include "indri/DocExtentListIterator.hpp"
00024 #include <utility>
00025 #include "RVLCompress.hpp"
00026 #include "IndexTypes.hpp"
00027
00028 namespace indri {
00029 namespace index {
00030 struct DocExtentListMemoryBuilderSegment {
00031 DocExtentListMemoryBuilderSegment( char* b, char* d, char* c ) {
00032 base = b;
00033 data = d;
00034 capacity = c;
00035 }
00036
00037 char* base;
00038 char* data;
00039 char* capacity;
00040 };
00041
00042 class DocExtentListMemoryBuilderIterator : public DocExtentListIterator {
00043 const indri::utility::greedy_vector< DocExtentListMemoryBuilderSegment, 4 >* _lists;
00044 indri::utility::greedy_vector< DocExtentListMemoryBuilderSegment, 4 >::const_iterator _current;
00045 indri::index::DocExtentListIterator::DocumentExtentData _data;
00046
00047 const char* _list;
00048 const char* _listEnd;
00049 bool _numeric;
00050 bool _ordinal;
00051 bool _parental;
00052 bool _finished;
00053
00054 public:
00055 void reset( const indri::utility::greedy_vector< DocExtentListMemoryBuilderSegment, 4 >& lists, bool numeric, bool ordinal, bool parental );
00056 void reset( class DocExtentListMemoryBuilder& builder );
00057
00058 DocExtentListMemoryBuilderIterator( const class DocExtentListMemoryBuilder& builder );
00059
00060 void startIteration();
00061 bool finished() const;
00062 bool nextEntry( lemur::api::DOCID_T documentID );
00063 bool nextEntry();
00064 indri::index::DocExtentListIterator::DocumentExtentData* currentEntry();
00065 };
00066
00067 class DocExtentListMemoryBuilder {
00068 public:
00069 typedef DocExtentListMemoryBuilderIterator iterator;
00070
00071 int _documentFrequency;
00072 int _extentFrequency;
00073
00074 indri::utility::greedy_vector< DocExtentListMemoryBuilderSegment, 4 > _lists;
00075
00076 char* _list;
00077 char* _listBegin;
00078 char* _listEnd;
00079
00080 int _lastLocation;
00081 lemur::api::DOCID_T _lastDocument;
00082 int _lastOrdinal;
00083 int _lastExtentFrequency;
00084
00085 char* _documentPointer;
00086 char* _locationCountPointer;
00087
00088 bool _numeric;
00089 bool _ordinal;
00090 bool _parental;
00091
00092 inline size_t _compressedSize( lemur::api::DOCID_T documentID, int begin, int end, INT64 number, int ordinal, int parent );
00093 inline void _safeAddLocation( lemur::api::DOCID_T documentID, int begin, int end, INT64 number, int ordinal, int parent );
00094 void _growAddLocation( lemur::api::DOCID_T documentID, int begin, int end, INT64 number, int ordinal, int parent, size_t newDataSize );
00095 size_t _roundUp( size_t amount );
00096 void _grow();
00097 void _terminateDocument();
00098
00099 public:
00100 DocExtentListMemoryBuilder( bool numeric, bool ordinal, bool parental );
00101 ~DocExtentListMemoryBuilder();
00102
00103 void addLocation( lemur::api::DOCID_T documentID, int begin, int end, INT64 number = 0, int ordinal = 0, int parent = 0 );
00104
00105 void clear();
00106 bool empty();
00107
00108 int documentFrequency() const;
00109 int extentFrequency() const;
00110 size_t memorySize() const;
00111
00112 void flush();
00113 iterator* getIterator();
00114 };
00115 }
00116 }
00117
00118 #endif // INDRI_DOCEXTENTLISTMEMORYBUILDER_HPP