00001 /*========================================================================== 00002 * Copyright (c) 2004 University of Massachusetts. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 00013 // 00014 // TagExtent 00015 // 00016 // 4 February 2004 -- tds 00017 // 00018 00019 #ifndef INDRI_TAGEXTENT_HPP 00020 #define INDRI_TAGEXTENT_HPP 00021 00022 #include "indri/AttributeValuePair.hpp" 00023 #include <string.h> 00024 00025 namespace indri 00026 { 00027 namespace parse 00028 { 00029 00030 struct TagExtent { 00031 00032 // A comparator that sorts by end value, lowest first 00033 struct lowest_end_first { 00034 00035 bool operator() ( const indri::parse::TagExtent* x, 00036 const indri::parse::TagExtent* y ) const { 00037 00038 // returns true if x < y; false otherwise 00039 00040 if ( x->end > y->end ) return true; 00041 else if ( x->end == y->end ) return ( x < y ); 00042 else return false; 00043 } 00044 }; 00045 00046 const char* name; 00047 int begin; 00048 int end; 00049 INT64 number; 00050 TagExtent *parent; 00051 // explicit initial count of two elements. 00052 indri::utility::greedy_vector<AttributeValuePair, 2> attributes; 00053 }; 00054 00055 00056 class LessTagExtent { 00057 public: 00058 bool operator()(indri::parse::TagExtent * extent1, indri::parse::TagExtent * extent2 ) { 00059 if ( extent1->begin < extent2->begin ) 00060 return true; 00061 if ( extent1->begin == extent2->begin 00062 && extent1->end > extent2->end ) 00063 return true; 00064 if ( extent1->begin == extent2->begin 00065 && extent1->end == extent2->end ) { 00066 return (extent1 < extent2); 00067 } 00068 00069 return false; 00070 } 00071 }; 00072 00073 } 00074 } 00075 00076 #include <functional> 00077 namespace std { 00078 00079 // An STL comparator that implements first-and-longest ordering 00080 template<> 00081 struct less<indri::parse::TagExtent*> { 00082 00083 bool operator() ( const indri::parse::TagExtent* x, 00084 const indri::parse::TagExtent* y ) const { 00085 00086 // returns true if x < y; false otherwise 00087 00088 if ( x->begin < y->begin ) return true; 00089 else if ( x->begin > y->begin ) return false; 00090 else { 00091 00092 if ( ( x->end - x->begin ) > ( y->end - y->begin ) ) return true; 00093 else if ( ( x->end - x->begin ) < ( y->end - y->begin ) ) return false; 00094 else { 00095 // We might have two extents with the same names at the same locations 00096 // as a result of offset annotations that actually have different children etc. 00097 return (x < y); 00098 00099 // Two TagExtents must have same begin and end and name to be 00100 // considered equal. 00101 00102 if ( strcmp( x->name, y->name ) < 0 ) return true; 00103 else return false; 00104 } 00105 } 00106 } 00107 }; 00108 } 00109 00110 #endif // INDRI_TAGEXTENT_HPP