Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

SnippetBuilder.hpp

Go to the documentation of this file.
00001 
00002 
00003 //
00004 // SnippetBuilder
00005 //
00006 // This code is based largely on the code I wrote for the PHP and Java
00007 // interfaces.
00008 //
00009 // 17 July 2006 -- tds
00010 //            
00011 
00012 #ifndef INDRI_SNIPPETBUILDER_HPP
00013 #define INDRI_SNIPPETBUILDER_HPP
00014 
00015 #include <vector>
00016 #include <string>
00017 #include "indri/QueryAnnotation.hpp"
00018 #include "indri/ParsedDocument.hpp"
00019 
00020 namespace indri {
00021   namespace api {
00022     class SnippetBuilder {
00023     private:
00024       bool _HTMLOutput;
00025       
00026     public:
00027       struct Region {
00028         int begin;
00029         int end;
00030         
00031         std::vector<indri::index::Extent> extents;
00032       };
00033 
00034     private:  
00035       void _getRawNodes( std::vector<std::string>& nodeNames, const indri::api::QueryAnnotationNode* node );
00036      
00037       std::vector< std::pair<indri::index::Extent, int> > _documentMatches( int document, 
00038                                                                             const std::map< std::string, std::vector<indri::api::ScoredExtentResult> >& annotations,
00039                                                                             const std::vector<std::string>& nodeNames );
00040       
00041       std::vector<Region> _buildRegions(
00042                                         std::vector< std::pair<indri::index::Extent, int> >& extents,
00043                                         int positionCount, int matchWidth, int windowWidth );
00044 
00045       Region _bestRegion( const std::vector< std::pair<indri::index::Extent, int> >& extents,
00046                           const std::vector< indri::api::SnippetBuilder::Region >& skipRegions,
00047                           int positionCount, int windowWidth );
00048 
00049       char* _sanitizeText( const char* text, int begin, int length );
00050 
00051       void _addEllipsis( std::string& snippet );
00052       void _addHighlightedRegion( std::string& snippet, char* region );
00053       void _addUnhighlightedRegion( std::string& snippet, char* region );
00054       void _completeSnippet( std::string& snippet );
00055       
00056     public:  
00057       SnippetBuilder( bool html ) : _HTMLOutput(html) {}
00058       std::string build( int documentID, const indri::api::ParsedDocument* document, indri::api::QueryAnnotation* annotation );
00059     };
00060   }
00061 }
00062 
00063 #endif // INDRI_SNIPPETBUILDER_HPP

Generated on Tue Jun 15 11:02:55 2010 for Lemur by doxygen 1.3.4