Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

BulkTree.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2005 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 //
00013 // BulkTree
00014 //
00015 // 4 March 2005 -- tds
00016 //
00017 
00018 #ifndef INDRI_BULKTREE_HPP
00019 #define INDRI_BULKTREE_HPP
00020 
00021 #include <vector>
00022 #include "indri/File.hpp"
00023 #include "indri/SequentialWriteBuffer.hpp"
00024 #include "indri/HashTable.hpp"
00025 namespace indri
00026 {
00028   namespace file
00029   {
00030     
00031     class BulkBlock {
00032     private:
00033       char* _buffer;
00034 
00035       BulkBlock* _previous;
00036       BulkBlock* _next;
00037       UINT32 _id;
00038 
00039       int _remainingCapacity();
00040       int _dataEnd();
00041       int _keyEnd( int index );
00042       int _keyStart( int index );
00043       int _valueStart( int index );
00044       int _valueEnd( int index );
00045 
00046       bool _canInsert( int keyLength, int dataLength );
00047       void _storeKeyValueLength( int insertPoint, int keyLength, int valueLength );
00048       int _compare( const char* one, int oneLength, const char* two, int twoLength );
00049 
00050       int _find( const char* key, int keyLength, bool& exact );
00051 
00052     public:
00053       BulkBlock( bool leaf = false );
00054       ~BulkBlock();
00055 
00056       int count();
00057       bool leaf();
00058 
00059       bool insert( const char* key, int keyLength, const char* data, int dataLength );
00060       bool insertFirstKey( BulkBlock& block, UINT32 blockID );
00061       bool getIndex( int index, char* key, int& keyActual, int keyLength, char* value, int& valueActual, int valueLength );
00062       bool findGreater( const char* key, int keyLength, char* value, int& actualLength, int valueBufferLength );
00063       bool find( const char* key, int keyLength, char* value, int& actualLength, int valueBufferLength );
00064 
00071       int  findIndexOf(const char* key);
00072 
00073       void clear();
00074       char* data();
00075       static UINT64 dataSize();
00076 
00077       // linked list
00078       void setID( UINT32 id );
00079       UINT32 getID();
00080       void link( BulkBlock* previous, BulkBlock* next );
00081       void unlink();
00082       BulkBlock* previous();
00083       BulkBlock* next();
00084     };
00085 
00086 
00087     class BulkTreeWriter {
00088     private:
00089       UINT32 _blockID;
00090       int _flushLevel;
00091 
00092       void _flush( int blockIndex );
00093       void _flushAll();
00094 
00095     public:
00096       std::vector<BulkBlock*> _blocks;
00097       File _file;
00098       SequentialWriteBuffer _write;
00099 
00100       BulkTreeWriter();
00101       ~BulkTreeWriter();
00102 
00103       void close();
00104       void create( const std::string& filename );
00105 
00106       void put( const char* key, const char* value, int valueLength );
00107       void put( UINT32 key, const char* value, int valueLength );
00108       void put( const char* key, int keyLength, const char* value, int valueLength );
00109 
00110       // this is a hack for now
00111       bool get( const char* key, int keyLength, char* value, int& actual, int valueLength );
00112       bool get( UINT32 key, char* value, int& actual, int valueLength );
00113       bool get( const char* key, char* value, int& actual, int valueLength );
00114 
00115       void flush();
00116     };
00117 
00118     class BulkTreeIterator {
00119     private:
00120       File& _file;
00121       UINT64 _fileLength;
00122       BulkBlock _block;
00123       int _pairIndex;
00124       UINT64 _blockIndex;
00125 
00126       bool readCurrentBlockData();
00127 
00128     public:
00129       BulkTreeIterator( File& file );
00130 
00140       BulkTreeIterator( File& file, UINT64 whichBlock, int whichPair );
00141 
00142       void startIteration();
00143       bool finished();
00144       bool get( char* key, int keyLength, int& keyActual, char* value, int valueLength, int& valueActual );
00145       bool get( UINT32& key, char* value, int valueLength, int& valueActual );
00146       void nextEntry();
00147     };
00148 
00149     class BulkTreeReader {
00150     private:
00151       File* _file;
00152       UINT64 _fileLength;
00153       bool _ownFile;
00154   
00155       BulkBlock* _head;
00156       BulkBlock* _tail;
00157       indri::utility::HashTable< UINT32, BulkBlock* > _cache;
00158 
00159       BulkBlock* _fetch( UINT32 id );
00160 
00161     public:
00162       BulkTreeReader();
00163       BulkTreeReader( File& file );
00164       BulkTreeReader( File& file, UINT64 length );
00165       ~BulkTreeReader();
00166   
00167       void openRead( const std::string& filename );
00168       bool get( const char* key, char* value, int& actual, int valueLength );
00169       bool get( const char* key, int keyLength, char* value, int& actual, int valueLength );
00170       bool get( UINT32 key, char* value, int& actual, int valueLength );
00171       void close();
00172 
00173       BulkTreeIterator* iterator();
00174 
00182       BulkTreeIterator* findFirst(const char *key);
00183     };
00184   }
00185 }
00186 
00187 #endif // INDRI_BULKTREE_HPP
00188 

Generated on Tue Jun 15 11:02:53 2010 for Lemur by doxygen 1.3.4