00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #ifndef INDRI_BULKTREE_HPP
00019 #define INDRI_BULKTREE_HPP
00020
00021 #include <vector>
00022 #include "indri/File.hpp"
00023 #include "indri/SequentialWriteBuffer.hpp"
00024 #include "indri/HashTable.hpp"
00025 namespace indri
00026 {
00028 namespace file
00029 {
00030
00031 class BulkBlock {
00032 private:
00033 char* _buffer;
00034
00035 BulkBlock* _previous;
00036 BulkBlock* _next;
00037 UINT32 _id;
00038
00039 int _remainingCapacity();
00040 int _dataEnd();
00041 int _keyEnd( int index );
00042 int _keyStart( int index );
00043 int _valueStart( int index );
00044 int _valueEnd( int index );
00045
00046 bool _canInsert( int keyLength, int dataLength );
00047 void _storeKeyValueLength( int insertPoint, int keyLength, int valueLength );
00048 int _compare( const char* one, int oneLength, const char* two, int twoLength );
00049
00050 int _find( const char* key, int keyLength, bool& exact );
00051
00052 public:
00053 BulkBlock( bool leaf = false );
00054 ~BulkBlock();
00055
00056 int count();
00057 bool leaf();
00058
00059 bool insert( const char* key, int keyLength, const char* data, int dataLength );
00060 bool insertFirstKey( BulkBlock& block, UINT32 blockID );
00061 bool getIndex( int index, char* key, int& keyActual, int keyLength, char* value, int& valueActual, int valueLength );
00062 bool findGreater( const char* key, int keyLength, char* value, int& actualLength, int valueBufferLength );
00063 bool find( const char* key, int keyLength, char* value, int& actualLength, int valueBufferLength );
00064
00071 int findIndexOf(const char* key);
00072
00073 void clear();
00074 char* data();
00075 static UINT64 dataSize();
00076
00077
00078 void setID( UINT32 id );
00079 UINT32 getID();
00080 void link( BulkBlock* previous, BulkBlock* next );
00081 void unlink();
00082 BulkBlock* previous();
00083 BulkBlock* next();
00084 };
00085
00086
00087 class BulkTreeWriter {
00088 private:
00089 UINT32 _blockID;
00090 int _flushLevel;
00091
00092 void _flush( int blockIndex );
00093 void _flushAll();
00094
00095 public:
00096 std::vector<BulkBlock*> _blocks;
00097 File _file;
00098 SequentialWriteBuffer _write;
00099
00100 BulkTreeWriter();
00101 ~BulkTreeWriter();
00102
00103 void close();
00104 void create( const std::string& filename );
00105
00106 void put( const char* key, const char* value, int valueLength );
00107 void put( UINT32 key, const char* value, int valueLength );
00108 void put( const char* key, int keyLength, const char* value, int valueLength );
00109
00110
00111 bool get( const char* key, int keyLength, char* value, int& actual, int valueLength );
00112 bool get( UINT32 key, char* value, int& actual, int valueLength );
00113 bool get( const char* key, char* value, int& actual, int valueLength );
00114
00115 void flush();
00116 };
00117
00118 class BulkTreeIterator {
00119 private:
00120 File& _file;
00121 UINT64 _fileLength;
00122 BulkBlock _block;
00123 int _pairIndex;
00124 UINT64 _blockIndex;
00125
00126 bool readCurrentBlockData();
00127
00128 public:
00129 BulkTreeIterator( File& file );
00130
00140 BulkTreeIterator( File& file, UINT64 whichBlock, int whichPair );
00141
00142 void startIteration();
00143 bool finished();
00144 bool get( char* key, int keyLength, int& keyActual, char* value, int valueLength, int& valueActual );
00145 bool get( UINT32& key, char* value, int valueLength, int& valueActual );
00146 void nextEntry();
00147 };
00148
00149 class BulkTreeReader {
00150 private:
00151 File* _file;
00152 UINT64 _fileLength;
00153 bool _ownFile;
00154
00155 BulkBlock* _head;
00156 BulkBlock* _tail;
00157 indri::utility::HashTable< UINT32, BulkBlock* > _cache;
00158
00159 BulkBlock* _fetch( UINT32 id );
00160
00161 public:
00162 BulkTreeReader();
00163 BulkTreeReader( File& file );
00164 BulkTreeReader( File& file, UINT64 length );
00165 ~BulkTreeReader();
00166
00167 void openRead( const std::string& filename );
00168 bool get( const char* key, char* value, int& actual, int valueLength );
00169 bool get( const char* key, int keyLength, char* value, int& actual, int valueLength );
00170 bool get( UINT32 key, char* value, int& actual, int valueLength );
00171 void close();
00172
00173 BulkTreeIterator* iterator();
00174
00182 BulkTreeIterator* findFirst(const char *key);
00183 };
00184 }
00185 }
00186
00187 #endif // INDRI_BULKTREE_HPP
00188