Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

SequentialReadBuffer.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 //
00013 // SequentialReadBuffer
00014 //
00015 // 10 December 2004 -- tds
00016 //
00017 
00018 #ifndef INDRI_SEQUENTIALREADBUFFER_HPP
00019 #define INDRI_SEQUENTIALREADBUFFER_HPP
00020 
00021 #include "indri/indri-platform.h"
00022 #include "indri/File.hpp"
00023 #include "indri/InternalFileBuffer.hpp"
00024 #include "Exception.hpp"
00025 
00026 namespace indri
00027 {
00028   namespace file
00029   {
00030     
00031     class SequentialReadBuffer {
00032     private:
00033       File& _file;
00034       UINT64 _position;
00035       InternalFileBuffer _current;
00036 
00037     public:
00038       SequentialReadBuffer( File& file ) :
00039         _file(file),
00040         _position(0),
00041         _current( 1024*1024 )
00042       {
00043       }
00044 
00045       SequentialReadBuffer( File& file, size_t length ) :
00046         _file(file),
00047         _position(0),
00048         _current( length )
00049       {
00050       }
00051 
00052       void cache( UINT64 position, size_t length ) {
00053         _current.buffer.clear();
00054         _current.filePosition = position;
00055         _current.buffer.grow( length );
00056 
00057         size_t actual = _file.read( _current.buffer.write( length ), _position, length );
00058         _current.buffer.unwrite( length - actual );
00059       }
00060 
00061       size_t read( void* buffer, UINT64 position, size_t length ) {
00062         if( position >= _current.filePosition && (position + length) <= _current.filePosition + _current.buffer.position() ) {
00063           memcpy( buffer, _current.buffer.front() + position - _current.filePosition, length );
00064           return length;
00065         } else {
00066           seek(position);
00067           return read( buffer, length );
00068         }
00069       }
00070 
00071       size_t read( void* buffer, size_t length ) {
00072         memcpy( buffer, read( length ), length );
00073         return length;
00074       }
00075 
00076       const void* peek( size_t length ) {
00077         const void* result = 0;
00078       
00079         if( _position < _current.filePosition || (_position + length) > _current.filePosition + _current.buffer.position() ) {
00080           // data isn't in the current buffer
00081           // this isn't necessarily the most efficient way to do this, but it should work
00082           cache( _position, std::max( length, _current.buffer.size() ) );
00083           // if we get a short read
00084           if ( _current.buffer.position() + _current.filePosition < _position + length ) {
00085             LEMUR_THROW(LEMUR_IO_ERROR, "read fewer bytes than expected.");
00086           }
00087           
00088         }
00089 
00090         result = _current.buffer.front() + ( _position - _current.filePosition );
00091         assert( _current.filePosition <= _position );
00092         assert( _current.buffer.position() + _current.filePosition >= _position + length );
00093         return result;
00094       }
00095 
00096       const void* read( size_t length ) {
00097         const void* result = peek( length );
00098         _position += length;
00099         return result;
00100       }
00101 
00102       void seek( UINT64 position ) {
00103         _position = position;
00104       }
00105 
00106       void clear() {
00107         _position = 0;
00108         _current.filePosition = 0;
00109         _current.buffer.clear();
00110       }
00111 
00112       UINT64 position() {
00113         return _position;
00114       }
00115     };
00116   
00117   }
00118 }
00119 
00120 #endif // INDRI_SEQUENTIALREADBUFFER_HPP
00121 

Generated on Tue Jun 15 11:02:55 2010 for Lemur by doxygen 1.3.4