00001 /*========================================================================== 00002 * Copyright (c) 2004 University of Massachusetts. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 00013 // 00014 // ListIteratorNode 00015 // 00016 // 26 January 2004 -- tds 00017 // 00018 00019 #ifndef INDRI_LISTITERATORNODE_HPP 00020 #define INDRI_LISTITERATORNODE_HPP 00021 00022 #include "indri/InferenceNetworkNode.hpp" 00023 #include "indri/Extent.hpp" 00024 #include <indri/greedy_vector> 00025 namespace indri 00026 { 00027 namespace infnet 00028 { 00029 class ListIteratorNode : public InferenceNetworkNode { 00030 protected: 00031 indri::utility::greedy_vector<indri::index::Extent> _matches; 00032 00033 // speedup for child siblings - 00034 // stores last successful position of the matches(extent) 00035 // search; linear search assuming ascending order 00036 int _lastpos; 00037 indri::index::Extent _lastExtent; 00038 00039 public: 00041 void initpointer() { 00042 _lastpos=0; 00043 } 00044 00046 void sortparent(indri::utility::greedy_vector<indri::index::Extent>& extents) { 00047 int sorted=0; 00048 int lastbegin = 0; int lastend = extents.size(); 00049 while(lastbegin<lastend){ 00050 int i=lastbegin; 00051 int end=lastend-1; 00052 lastbegin=lastend; 00053 lastend=i; 00054 for (;i<end;i++){ 00055 if (extents[i].parent > extents[i+1].parent){ 00056 indri::index::Extent x(extents[i]); 00057 extents[i]=extents[i+1];extents[i+1]=x; 00058 if(lastbegin>i)lastbegin=i; 00059 if(lastend<i+1)lastend=i+1; 00060 } 00061 } 00062 } 00063 } 00064 00066 void sortbegin(indri::utility::greedy_vector<indri::index::Extent>& extents){ 00067 int sorted=0; 00068 int lastbegin = 0; int lastend = extents.size(); 00069 while(lastbegin<lastend){ 00070 int i=lastbegin; 00071 int end=lastend-1; 00072 lastbegin=lastend; 00073 lastend=i; 00074 for (;i<end;i++){ 00075 if (extents[i].begin > extents[i+1].begin){ 00076 indri::index::Extent x(extents[i]); 00077 extents[i]=extents[i+1];extents[i+1]=x; 00078 if(lastbegin>i)lastbegin=i; 00079 if(lastend<i+1)lastend=i+1; 00080 } 00081 } 00082 } 00083 } 00084 00086 virtual void prepare( lemur::api::DOCID_T documentID ) = 0; 00087 00089 virtual const indri::utility::greedy_vector<indri::index::Extent>& extents() = 0; 00090 00092 virtual void annotate( class Annotator& annotator, lemur::api::DOCID_T documentID, indri::index::Extent &extent ) = 0; 00093 00094 // Moved work of identifying matches of an extent from ExtentRestriction to here. 00095 // This allows the computation for the list of matches for an extent to be 00096 // overridden. ExtentParent, ExtentChild, and ExtentDescendant are among the 00097 // classes that need to do this, as the parent/child/descedant relationships 00098 // are not based on extent containment - these relationships may be among 00099 // arbitrary fields in a document. 00100 virtual const indri::utility::greedy_vector<indri::index::Extent>& matches( indri::index::Extent &extent ) { 00101 int begin = extent.begin; 00102 int end = extent.end; 00103 _matches.clear(); 00104 const indri::utility::greedy_vector<indri::index::Extent>& exts = extents(); 00105 00106 // if there's no extents or we have no length - just return 00107 if (begin == end || exts.size()==0) return _matches; 00108 00109 // if we are dealing with child extents, we need to reverse the 00110 // list pointer to the last good position 00111 while((_lastpos > 0) && (exts[_lastpos-1].begin >= begin)){ 00112 _lastpos--; 00113 } 00114 00115 // now, we make sure we're in the correct position 00116 // after this loop, _lastpos->begin >= begin 00117 while((_lastpos < exts.size()) && (exts[_lastpos].begin < begin)){ 00118 _lastpos++; 00119 } 00120 00121 // for default DocListIteratorNode, any extent: begin+1 == end. 00122 while((_lastpos < exts.size()) && (exts[_lastpos].begin < end)) { 00123 if(exts[_lastpos].end <= end) { 00124 indri::index::Extent ext(exts[_lastpos]); 00125 _matches.push_back(ext); 00126 } // end if(_exts[_lastpos].end<=end) 00127 _lastpos++; 00128 } // end while(_lastpos<_exts.size()&&_exts[_lastpos].begin<end) 00129 00130 /*** 00131 *** old method of matching child extents - deprecated 00132 * 00133 * for( size_t i = 0 ; i < exts.size(); i++ ) { 00134 * if ( begin <= exts[i].begin && end >= exts[i].end ) { 00135 * _matches.push_back( exts[i] ); 00136 * } else if ( exts[i].begin > end ) { 00137 * break; 00138 * } 00139 * } 00140 **/ 00141 return _matches; 00142 } 00143 }; 00144 } 00145 } 00146 00147 #endif // INDRI_LISTNODE_HPP 00148