Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

DiskTermData.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 //
00013 // DiskTermData
00014 //
00015 // 13 December 2004 -- tds
00016 //
00017 
00018 #ifndef INDRI_DISKTERMDATA_HPP
00019 #define INDRI_DISKTERMDATA_HPP
00020 
00021 #include "indri/TermData.hpp"
00022 #include "Keyfile.hpp"
00023 #include "IndexTypes.hpp"
00024 
00025 namespace indri {
00026   namespace index {
00027     struct DiskTermData {
00028       enum {
00029         WithOffsets = 0x01,
00030         WithString = 0x02,
00031         WithTermID = 0x04
00032       };
00033 
00034       TermData* termData;
00035       lemur::api::TERMID_T termID;
00036 
00037       UINT64 startOffset;
00038       UINT64 length;
00039     };
00040   }
00041 }
00042 
00043 
00044 //
00045 // disktermdata_decompress
00046 //
00047 
00048 inline void disktermdata_compress( indri::utility::RVLCompressStream& stream, indri::index::DiskTermData* diskData, int fieldCount, int mode ) {
00049   ::termdata_compress( stream, diskData->termData, fieldCount );
00050 
00051   if( mode & indri::index::DiskTermData::WithTermID ) {
00052     stream << diskData->termID;
00053   }
00054 
00055   if( mode & indri::index::DiskTermData::WithString ) {
00056     stream << diskData->termData->term;
00057   }
00058 
00059   if( mode & indri::index::DiskTermData::WithOffsets ) {
00060     stream << diskData->startOffset
00061            << diskData->length;
00062   }
00063 }
00064 
00065 //
00066 // disktermdata_decompress
00067 //
00068 
00069 inline indri::index::DiskTermData* disktermdata_decompress( indri::utility::RVLDecompressStream& stream, void* buffer, int fieldCount, int mode ) {
00070   indri::index::DiskTermData* diskData = (indri::index::DiskTermData*) buffer;
00071 
00072   int termDataSize = ::termdata_size( fieldCount );
00073   char* termLocation = (char*)buffer + sizeof(indri::index::DiskTermData) + termDataSize;
00074   indri::index::TermData* termDataLocation = (indri::index::TermData*) ((char*)buffer + sizeof(indri::index::DiskTermData));
00075 
00076   diskData->termData = termDataLocation;
00077   diskData->termData->term = termLocation;
00078 
00079   // set first byte of string to zero
00080   termLocation[0] = 0;
00081 
00082   ::termdata_decompress( stream, diskData->termData, fieldCount );
00083 
00084   if( mode & indri::index::DiskTermData::WithTermID ) {
00085     stream >> diskData->termID;
00086   } else {
00087     diskData->termID = 0;
00088   }
00089 
00090   if( mode & indri::index::DiskTermData::WithString ) {
00091     stream >> termLocation;
00092   }
00093 
00094   if( mode & indri::index::DiskTermData::WithOffsets ) {
00095     stream >> diskData->startOffset
00096            >> diskData->length;
00097   } else {
00098     diskData->startOffset = 0;
00099     diskData->length = 0;
00100   }
00101 
00102   return diskData;
00103 }
00104 
00105 //
00106 // disktermdata_size
00107 //
00108 
00109 inline int disktermdata_size( int fieldCount ) {
00110   // how much space are we going to need?
00111   int termDataSize = ::termdata_size( fieldCount );
00112   int totalSize = termDataSize + (lemur::file::Keyfile::MAX_KEY_LENGTH+2) + sizeof(indri::index::DiskTermData);
00113 
00114   return totalSize;
00115 }
00116 
00117 //
00118 // disktermdata_create
00119 //
00120 
00121 inline indri::index::DiskTermData* disktermdata_create( int fieldCount ) {
00122   char* dataBlock = (char*) malloc( disktermdata_size( fieldCount ) );
00123 
00124   indri::index::DiskTermData* diskTermData = (indri::index::DiskTermData*) dataBlock;
00125 
00126   diskTermData->termData = (indri::index::TermData*) (dataBlock +
00127                                                       sizeof (indri::index::DiskTermData));
00128   termdata_construct( diskTermData->termData, fieldCount );
00129 
00130   diskTermData->termData->term = dataBlock +
00131     sizeof (indri::index::DiskTermData) +
00132     termdata_size( fieldCount );
00133   const_cast<char*>(diskTermData->termData->term)[0] = 0;
00134 
00135   return diskTermData;
00136 }
00137 
00138 //
00139 // disktermdata_decompress
00140 //
00141 
00142 inline indri::index::DiskTermData* disktermdata_decompress( indri::utility::RVLDecompressStream& stream, int fieldCount, int mode ) {
00143   // how much space are we going to need?
00144   int totalSize = disktermdata_size( fieldCount );
00145 
00146   return ::disktermdata_decompress( stream, malloc( totalSize ), fieldCount, mode );
00147 }
00148 
00149 //
00150 // disktermdata_delete
00151 //
00152 
00153 inline void disktermdata_delete( indri::index::DiskTermData* diskData ) {
00154   free( diskData );
00155 }
00156 
00157 #endif // INDRI_DISKTERMDATA_HPP
00158 
00159 

Generated on Tue Jun 15 11:02:53 2010 for Lemur by doxygen 1.3.4