Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

DocExtentListMemoryBuilder.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 //
00013 // DocExtentListMemoryBuilder
00014 //
00015 // 24 November 2004 -- tds
00016 //
00017 
00018 #ifndef INDRI_DOCEXTENTLISTMEMORYBUILDER_HPP
00019 #define INDRI_DOCEXTENTLISTMEMORYBUILDER_HPP
00020 
00021 #include "indri/indri-platform.h"
00022 #include "indri/greedy_vector"
00023 #include "indri/DocExtentListIterator.hpp"
00024 #include <utility>
00025 #include "RVLCompress.hpp"
00026 #include "IndexTypes.hpp"
00027 
00028 namespace indri {
00029   namespace index {
00030     struct DocExtentListMemoryBuilderSegment {
00031       DocExtentListMemoryBuilderSegment( char* b, char* d, char* c ) {
00032         base = b;
00033         data = d;
00034         capacity = c;
00035       }
00036 
00037       char* base;
00038       char* data;
00039       char* capacity;
00040     };
00041 
00042     class DocExtentListMemoryBuilderIterator : public DocExtentListIterator {
00043       const indri::utility::greedy_vector< DocExtentListMemoryBuilderSegment, 4 >* _lists;
00044       indri::utility::greedy_vector< DocExtentListMemoryBuilderSegment, 4 >::const_iterator _current;
00045       indri::index::DocExtentListIterator::DocumentExtentData _data;
00046       
00047       const char* _list;
00048       const char* _listEnd;
00049       bool _numeric;
00050       bool _ordinal;
00051       bool _parental;
00052       bool _finished;
00053 
00054     public:
00055       void reset( const indri::utility::greedy_vector< DocExtentListMemoryBuilderSegment, 4 >& lists, bool numeric, bool ordinal, bool parental );
00056       void reset( class DocExtentListMemoryBuilder& builder );
00057 
00058       DocExtentListMemoryBuilderIterator( const class DocExtentListMemoryBuilder& builder ); 
00059       
00060       void startIteration();
00061       bool finished() const;
00062       bool nextEntry( lemur::api::DOCID_T documentID );
00063       bool nextEntry();
00064       indri::index::DocExtentListIterator::DocumentExtentData* currentEntry();
00065     };
00066 
00067     class DocExtentListMemoryBuilder {
00068     public:
00069       typedef DocExtentListMemoryBuilderIterator iterator;
00070 
00071       int _documentFrequency;
00072       int _extentFrequency;
00073 
00074       indri::utility::greedy_vector< DocExtentListMemoryBuilderSegment, 4 > _lists;
00075 
00076       char* _list;
00077       char* _listBegin;
00078       char* _listEnd;
00079 
00080       int _lastLocation;
00081       lemur::api::DOCID_T _lastDocument;
00082       int _lastOrdinal;
00083       int _lastExtentFrequency;
00084 
00085       char* _documentPointer;
00086       char* _locationCountPointer;
00087 
00088       bool _numeric;
00089       bool _ordinal;
00090       bool _parental;
00091 
00092       inline size_t _compressedSize( lemur::api::DOCID_T documentID, int begin, int end, INT64 number, int ordinal, int parent );
00093       inline void _safeAddLocation( lemur::api::DOCID_T documentID, int begin, int end, INT64 number, int ordinal, int parent );
00094       void _growAddLocation( lemur::api::DOCID_T documentID, int begin, int end, INT64 number, int ordinal, int parent, size_t newDataSize );
00095       size_t _roundUp( size_t amount );
00096       void _grow();
00097       void _terminateDocument();
00098 
00099     public:
00100       DocExtentListMemoryBuilder( bool numeric, bool ordinal, bool parental );
00101       ~DocExtentListMemoryBuilder();
00102 
00103       void addLocation( lemur::api::DOCID_T documentID, int begin, int end, INT64 number = 0, int ordinal = 0, int parent = 0 );
00104 
00105       void clear();
00106       bool empty();
00107 
00108       int documentFrequency() const;
00109       int extentFrequency() const;
00110       size_t memorySize() const;
00111 
00112       void flush();
00113       iterator* getIterator();
00114     };
00115   }
00116 }
00117 
00118 #endif // INDRI_DOCEXTENTLISTMEMORYBUILDER_HPP

Generated on Tue Jun 15 11:02:53 2010 for Lemur by doxygen 1.3.4