00001 /*========================================================================== 00002 * Copyright (c) 2004 University of Massachusetts. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 00013 // 00014 // KeyfileDocumentData 00015 // 00016 // 4 January 2004 -- tds 00017 // 00018 00019 #ifndef INDRI_KEYFILEDOCUMENTDATA_HPP 00020 #define INDRI_KEYFILEDOCUMENTDATA_HPP 00021 00022 #include "indri/indri-platform.h" 00023 00024 namespace indri { 00025 namespace index { 00026 struct DocumentData { 00027 DocumentData() : offset(0), byteLength(0), indexedLength(0), uniqueTermCount(0) {} 00028 00029 UINT64 offset; // offset into the dt file where we'll find the TermList 00030 int byteLength; // length in bytes of the TermList 00031 int indexedLength; // the length of the document without stopwords 00032 int totalLength; // the length of the document including stopwords (used for scoring) 00033 int uniqueTermCount; // number of unique terms found in this document 00034 }; 00035 } 00036 } 00037 00038 #endif // INDRI_KEYFILEDOCUMENTDATA_HPP 00039 00040