00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef INDRI_SNIPPETBUILDER_HPP
00013 #define INDRI_SNIPPETBUILDER_HPP
00014
00015 #include <vector>
00016 #include <string>
00017 #include "indri/QueryAnnotation.hpp"
00018 #include "indri/ParsedDocument.hpp"
00019
00020 namespace indri {
00021 namespace api {
00022 class SnippetBuilder {
00023 private:
00024 bool _HTMLOutput;
00025
00026 public:
00027 struct Region {
00028 int begin;
00029 int end;
00030
00031 std::vector<indri::index::Extent> extents;
00032 };
00033
00034 private:
00035 void _getRawNodes( std::vector<std::string>& nodeNames, const indri::api::QueryAnnotationNode* node );
00036
00037 std::vector< std::pair<indri::index::Extent, int> > _documentMatches( int document,
00038 const std::map< std::string, std::vector<indri::api::ScoredExtentResult> >& annotations,
00039 const std::vector<std::string>& nodeNames );
00040
00041 std::vector<Region> _buildRegions(
00042 std::vector< std::pair<indri::index::Extent, int> >& extents,
00043 int positionCount, int matchWidth, int windowWidth );
00044
00045 Region _bestRegion( const std::vector< std::pair<indri::index::Extent, int> >& extents,
00046 const std::vector< indri::api::SnippetBuilder::Region >& skipRegions,
00047 int positionCount, int windowWidth );
00048
00049 char* _sanitizeText( const char* text, int begin, int length );
00050
00051 void _addEllipsis( std::string& snippet );
00052 void _addHighlightedRegion( std::string& snippet, char* region );
00053 void _addUnhighlightedRegion( std::string& snippet, char* region );
00054 void _completeSnippet( std::string& snippet );
00055
00056 public:
00057 SnippetBuilder( bool html ) : _HTMLOutput(html) {}
00058 std::string build( int documentID, const indri::api::ParsedDocument* document, indri::api::QueryAnnotation* annotation );
00059 };
00060 }
00061 }
00062
00063 #endif // INDRI_SNIPPETBUILDER_HPP