Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

InvFPTypes.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 
00013 /* type definitions for objects we will use */
00014 #ifndef _INVFPTYPES_H
00015 #define _INVFPTYPES_H
00016 
00017 #include "common_headers.hpp"
00018 #include "IndexTypes.hpp"
00019 #include <cstring>
00020 namespace lemur 
00021 {
00022   namespace index
00023   {
00024     
00025 #define IND_VERSION "4.12"
00026 
00027     // suffixes for filenames
00028 #define INVINDEX  ".invf"
00029 #define INVFPINDEX ".invfp"
00030 #define INVLOOKUP  ".invlookup"
00031 #define DTINDEX  ".dt"
00032 #define DTLOOKUP  ".dtlookup"
00033 #define TERMIDMAP  ".tid"
00034 #define TERMIDSTRMAP ".tidstr"
00035 #define DOCIDMAP  ".did"
00036 #define DOCIDSTRMAP ".didstr"
00037 #define MAINTOC  ".inv"
00038 #define INVFPTOC ".ifp"
00039 #define DOCMGRMAP ".dm"
00040 
00041     // what to call out of vocabulary ids
00042 #define INVALID_STR "[OOV]"
00043 
00044     // name for parameters
00045 #define VERSION_PAR "VERSION"
00046 #define NUMDOCS_PAR "NUM_DOCS"
00047 #define NUMTERMS_PAR "NUM_TERMS"
00048 #define NUMUTERMS_PAR "NUM_UNIQUE_TERMS"
00049 #define AVEDOCLEN_PAR "AVE_DOCLEN"
00050 #define INVINDEX_PAR  "INV_INDEX"
00051 #define INVLOOKUP_PAR  "INV_LOOKUP"
00052 #define DTINDEX_PAR  "DT_INDEX"
00053 #define DTLOOKUP_PAR  "DT_LOOKUP"
00054 #define TERMIDMAP_PAR  "TERMIDS"
00055 #define TERMIDSTRMAP_PAR "TERMIDSTRS"
00056 #define DOCIDMAP_PAR  "DOCIDS"
00057 #define DOCIDSTRMAP_PAR "DOCIDSTRS"
00058 #define NUMDT_PAR  "NUM_DTFILES"
00059 #define NUMINV_PAR  "NUM_INVFILES"
00060 #define DOCMGR_PAR  "DOCMGR_IDS"
00061 
00062     struct LocatedTerm { // pair of term and its location
00063       lemur::api::TERMID_T term;
00064       lemur::api::LOC_T loc;
00065     };
00066 
00067     struct LLTerm { // pair of term and list of locations
00068       lemur::api::TERMID_T term;
00069       vector<lemur::api::LOC_T> loc;
00070     };
00071 
00072     struct dt_entry {   // an entry in the lookup table for docterm lists index
00073       lemur::api::FILEID_T fileid;  // which file the word is in
00074       long offset;        // what the offset into the file is
00075       int length;         // the length of the inverted list
00076       int docmgr;         // the docmgr id of manager for this doc
00077     };
00078 
00079     struct inv_entry {   // an entry in the lookup table for docterm lists index
00080       lemur::api::FILEID_T fileid;  // which file the word is in
00081       long offset;        // what the offset into the file is
00082       lemur::api::COUNT_T ctf;            // collection term freq
00083       lemur::api::COUNT_T df;             // doc freq
00084     };
00085 
00086     struct ltstr
00087     {
00088       bool operator()(char* s1, char* s2) const{
00089         return strcmp(s1, s2) < 0;
00090       }
00091     };
00092   }
00093 }
00094 
00095 #endif

Generated on Tue Jun 15 11:02:54 2010 for Lemur by doxygen 1.3.4