00001 /*========================================================================== 00002 * Copyright (c) 2000-2004 Carnegie Mellon University. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software (and below), and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 #include "TextHandler.hpp" 00013 #include "PushIndex.hpp" 00014 #include "InvFPTermList.hpp" 00015 #include "Parser.hpp" 00016 #include "WordSet.hpp" 00017 00018 #include <stdio.h> 00019 00020 #ifndef _CTFINDEXER_HPP 00021 #define _CTFINDEXER_HPP 00022 00023 namespace lemur 00024 { 00025 namespace distrib 00026 { 00027 00028 class CtfIndexer : public lemur::api::TextHandler { 00029 00030 public: 00031 CtfIndexer(const string &csName, int bufferSize, 00032 bool countStopWords = false); 00033 ~CtfIndexer(); 00034 00035 char * handleWord(char * word); 00036 00037 void newDb(const string &name); 00038 00039 private: 00040 int ctfCount; 00041 bool first; 00042 lemur::index::PushIndex * collsel; 00043 lemur::parse::DocumentProps * csdp; 00044 lemur::index::InvFPTerm * term; 00045 bool countStopWds; 00046 }; 00047 } 00048 } 00049 00050 #endif