00001 /*========================================================================== 00002 * Copyright (c) 2004 University of Massachusetts. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 00013 // 00014 // UnorderedWindowNode 00015 // 00016 // 26 January 2004 -- tds 00017 // 11 December 2007 -- mjh - added child sort 00018 // 00019 00020 #ifndef INDRI_UNORDEREDWINDOWNODE_HPP 00021 #define INDRI_UNORDEREDWINDOWNODE_HPP 00022 00023 #include "indri/ListIteratorNode.hpp" 00024 #include <vector> 00025 #include <indri/greedy_vector> 00026 namespace indri 00027 { 00028 namespace infnet 00029 { 00030 00031 // comparison operation used in child sorting 00032 // for ascending order by # of extents in each child 00033 inline bool UWNodeChildLess(indri::infnet::ListIteratorNode* _child1, indri::infnet::ListIteratorNode* _child2) { 00034 const indri::utility::greedy_vector<indri::index::Extent>& childPositions1 = _child1->extents(); 00035 const indri::utility::greedy_vector<indri::index::Extent>& childPositions2 = _child2->extents(); 00036 return (childPositions1.size() < childPositions2.size()); 00037 } 00038 00039 class UnorderedWindowNode : public ListIteratorNode { 00040 private: 00041 struct term_position { 00042 bool operator< ( const term_position& other ) const { 00043 return begin < other.begin; 00044 } 00045 00046 double weight; 00047 int type; 00048 int begin; 00049 int end; 00050 int last; // index of previous entry of this type 00051 }; 00052 00053 int _windowSize; 00054 std::vector<ListIteratorNode*> _children; 00055 indri::utility::greedy_vector<indri::index::Extent> _extents; 00056 std::string _name; 00057 bool _childrenAlreadySorted; 00058 00059 public: 00060 UnorderedWindowNode( const std::string& name, std::vector<ListIteratorNode*>& children ); 00061 UnorderedWindowNode( const std::string& name, std::vector<ListIteratorNode*>& children, int windowSize ); 00062 lemur::api::DOCID_T nextCandidateDocument(); 00063 void indexChanged( indri::index::Index& index ); 00064 void prepare( lemur::api::DOCID_T documentID ); 00065 const indri::utility::greedy_vector<indri::index::Extent>& extents(); 00066 const std::string& getName() const; 00067 void annotate( Annotator& annotator, lemur::api::DOCID_T documentID, indri::index::Extent &extent ); 00068 }; 00069 } 00070 } 00071 00072 #endif // INDRI_UNORDEREDWINDOWNODE_HPP