00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef INDRI_DOCUMENTSTRUCTURE_HPP
00013 #define INDRI_DOCUMENTSTRUCTURE_HPP
00014
00015 #include "indri/FieldExtent.hpp"
00016 #include "indri/greedy_vector"
00017 #include <sstream>
00018 #include <vector>
00019 #include <set>
00020
00021 typedef struct ninf {
00022 int id;
00023 int parent;
00024 int type;
00025
00026 int begin;
00027 int end;
00028
00029 UINT64 number;
00030
00031 int length;
00032 int totalLength;
00033
00034 int numChildren;
00035 int children;
00036
00037 } nodeinfo_t;
00038
00039
00040 namespace indri {
00041 namespace index {
00042
00043 class Index;
00044
00045 class DocumentStructure {
00046
00047 public:
00048
00049 DocumentStructure(Index & index, const indri::utility::greedy_vector<indri::index::FieldExtent> & fields)
00050 : _index( &index ) ,
00051 _numNodes( 0 ),
00052 _nodes( ),
00053 _childrenBuff( )
00054 {
00055 loadStructure(fields);
00056 }
00057
00058 DocumentStructure(Index & index) :
00059 _index( &index ) ,
00060 _numNodes( 0 ),
00061 _nodes( ),
00062 _childrenBuff( )
00063 {
00064 }
00065
00066 DocumentStructure() :
00067 _index( 0 ) ,
00068 _numNodes( 0 ),
00069 _nodes( ),
00070 _childrenBuff( )
00071 {
00072 }
00073
00074 void loadStructure(const indri::utility::greedy_vector<indri::index::FieldExtent> & fields);
00075 void setIndex( indri::index::Index & index );
00076 indri::index::Index * getIndex() { return _index; }
00077
00078 ~DocumentStructure();
00079
00080 int parent(int node);
00081 int length(int node);
00082 int begin(int node);
00083 int end(int node);
00084 int accumulatedLength(int node);
00085 int type(int node);
00086 UINT64 number(int node);
00087
00088 typedef indri::utility::greedy_vector<int>::iterator child_iterator;
00089
00090
00091 child_iterator childrenBegin( int node );
00092
00093
00094 child_iterator childrenEnd( int node );
00095
00096
00097 int nodeCount() { return _numNodes; }
00098
00099
00100
00101 int findLeaf(int begin, int end, int root = -1);
00102
00103
00104
00105
00106 std::set<int> * findLeafs(int begin, int end, bool exact = false);
00107 void findLeafs(std::set<int> * leafs, int begin, int end, bool exact = false);
00108
00109
00110
00111 bool ancestor(int node, int possibleAncestor);
00112
00113 std::string path(int node);
00114
00115 int fieldId( const std::string path );
00116
00117
00118 indri::utility::greedy_vector<int> & topDownOrder();
00119 void topDownOrder(std::set<int> & roots, indri::utility::greedy_vector<int> & order);
00120
00121 protected:
00122
00123 int _numNodes;
00124
00125 indri::utility::greedy_vector<nodeinfo_t> _nodes;
00126 indri::utility::greedy_vector<int> _childrenBuff;
00127
00128 Index * _index;
00129 void _constructNodePath(std::stringstream & path, int node);
00130
00131 indri::utility::greedy_vector<int> _topDownOrder;
00132
00133 void _computeTopDownOrder();
00134
00135
00136
00137 bool _findLeafs(int node, int begin, int end, bool exact, std::set<int> * leafs);
00138
00139 };
00140
00141 }
00142 }
00143 #endif
00144