Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

TextTokenizer.cpp File Reference

#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <stdlib.h>
#include <ctype.h>
#include "indri/TextTokenizer.hpp"
#include "indri/TermExtent.hpp"
#include "indri/TagEvent.hpp"
#include "indri/TokenizedDocument.hpp"
#include "indri/UnparsedDocument.hpp"
#include "indri/UTF8Transcoder.hpp"
#include "indri/AttributeValuePair.hpp"

Classes

struct  yy_buffer_state
struct  yy_trans_info

Defines

#define YY_INT_ALIGNED   short int
#define FLEX_SCANNER
#define YY_FLEX_MAJOR_VERSION   2
#define YY_FLEX_MINOR_VERSION   5
#define YY_FLEX_SUBMINOR_VERSION   33
#define FLEX_BETA
#define INT8_MIN   (-128)
#define INT16_MIN   (-32767-1)
#define INT32_MIN   (-2147483647-1)
#define INT8_MAX   (127)
#define INT16_MAX   (32767)
#define INT32_MAX   (2147483647)
#define UINT8_MAX   (255U)
#define UINT16_MAX   (65535U)
#define UINT32_MAX   (4294967295U)
#define yyconst
#define YY_NULL   0
#define YY_SC_TO_UI(c)   ((unsigned int) (unsigned char) c)
#define BEGIN   (yy_start) = 1 + 2 *
#define YY_START   (((yy_start) - 1) / 2)
#define YYSTATE   YY_START
#define YY_STATE_EOF(state)   (YY_END_OF_BUFFER + state + 1)
#define YY_NEW_FILE   tokrestart(tokin )
#define YY_END_OF_BUFFER_CHAR   0
#define YY_BUF_SIZE   16384
#define YY_STATE_BUF_SIZE   ((YY_BUF_SIZE + 2) * sizeof(yy_state_type))
#define EOB_ACT_CONTINUE_SCAN   0
#define EOB_ACT_END_OF_FILE   1
#define EOB_ACT_LAST_MATCH   2
#define YY_LESS_LINENO(n)
#define yyless(n)
#define unput(c)   yyunput( c, (yytext_ptr) )
#define YY_BUFFER_NEW   0
#define YY_BUFFER_NORMAL   1
#define YY_BUFFER_EOF_PENDING   2
#define YY_CURRENT_BUFFER
#define YY_CURRENT_BUFFER_LVALUE   (yy_buffer_stack)[(yy_buffer_stack_top)]
#define YY_FLUSH_BUFFER   tok_flush_buffer(YY_CURRENT_BUFFER )
#define yy_new_buffer   tok_create_buffer
#define yy_set_interactive(is_interactive)
#define yy_set_bol(at_bol)
#define YY_AT_BOL()   (YY_CURRENT_BUFFER_LVALUE->yy_at_bol)
#define tokwrap(n)   1
#define YY_SKIP_YYWRAP
#define yytext_ptr   toktext
#define YY_DO_BEFORE_ACTION
#define YY_NUM_RULES   14
#define YY_END_OF_BUFFER   15
#define REJECT   reject_used_but_not_detected
#define yymore()   yymore_used_but_not_detected
#define YY_MORE_ADJ   0
#define YY_RESTORE_YY_MORE_OFFSET
#define ZAP   1
#define TAG   2
#define ASCII_TOKEN   3
#define UTF8_TOKEN   4
#define INITIAL   0
#define COMMENT   1
#define YY_EXTRA_TYPE   void *
#define YY_READ_BUF_SIZE   8192
#define ECHO   (void) fwrite( toktext, tokleng, 1, tokout )
#define YY_INPUT(buf, result, max_size)
#define yyterminate()   return YY_NULL
#define YY_START_STACK_INCR   25
#define YY_FATAL_ERROR(msg)   yy_fatal_error( msg )
#define YY_DECL_IS_OURS   1
#define YY_DECL   int toklex (void)
#define YY_BREAK   break;
#define YY_RULE_SETUP   YY_USER_ACTION
#define YY_EXIT_FAILURE   2
#define yyless(n)
#define YYTABLES_NAME   "yytables"

Typedefs

typedef signed char flex_int8_t
typedef short int flex_int16_t
typedef int flex_int32_t
typedef unsigned char flex_uint8_t
typedef unsigned short int flex_uint16_t
typedef unsigned int flex_uint32_t
typedef yy_buffer_stateYY_BUFFER_STATE
typedef unsigned int yy_size_t
typedef unsigned char YY_CHAR
typedef int yy_state_type

Functions

void tokrestart (FILE *input_file)
void tok_switch_to_buffer (YY_BUFFER_STATE new_buffer)
YY_BUFFER_STATE tok_create_buffer (FILE *file, int size)
void tok_delete_buffer (YY_BUFFER_STATE b)
void tok_flush_buffer (YY_BUFFER_STATE b)
void tokpush_buffer_state (YY_BUFFER_STATE new_buffer)
void tokpop_buffer_state (void)
void tokensure_buffer_stack (void)
void tok_load_buffer_state (void)
void tok_init_buffer (YY_BUFFER_STATE b, FILE *file)
YY_BUFFER_STATE tok_scan_buffer (char *base, yy_size_t size)
YY_BUFFER_STATE tok_scan_string (yyconst char *yy_str)
YY_BUFFER_STATE tok_scan_bytes (yyconst char *bytes, int len)
void * tokalloc (yy_size_t)
void * tokrealloc (void *, yy_size_t)
void tokfree (void *)
yy_state_type yy_get_previous_state (void)
yy_state_type yy_try_NUL_trans (yy_state_type current_state)
int yy_get_next_buffer (void)
void yy_fatal_error (yyconst char msg[])
int yy_init_globals (void)
void yyunput (int c, char *buf_ptr)
int input (void)
int toklex (void)

Variables

int tokleng
FILE * tokin = (FILE *) 0 *tokout = (FILE *) 0
FILE * tokout
size_t yy_buffer_stack_top = 0
size_t yy_buffer_stack_max = 0
YY_BUFFER_STATEyy_buffer_stack = 0
char yy_hold_char
int yy_n_chars
char * yy_c_buf_p = (char *) 0
int yy_init = 0
int yy_start = 0
int yy_did_buffer_switch_on_eof
int toklineno = 1
char * toktext
yyconst flex_int16_t yy_nxt [][256]
yyconst flex_int16_t yy_accept [68]
yy_state_type yy_last_accepting_state
char * yy_last_accepting_cpos
yyconst yy_state_type yy_NUL_trans [68]
int tok_flex_debug = 0
long byte_position

Define Documentation

#define ASCII_TOKEN   3
 

#define BEGIN   (yy_start) = 1 + 2 *
 

#define COMMENT   1
 

#define ECHO   (void) fwrite( toktext, tokleng, 1, tokout )
 

#define EOB_ACT_CONTINUE_SCAN   0
 

#define EOB_ACT_END_OF_FILE   1
 

#define EOB_ACT_LAST_MATCH   2
 

#define FLEX_BETA
 

#define FLEX_SCANNER
 

#define INITIAL   0
 

#define INT16_MAX   (32767)
 

#define INT16_MIN   (-32767-1)
 

#define INT32_MAX   (2147483647)
 

#define INT32_MIN   (-2147483647-1)
 

#define INT8_MAX   (127)
 

#define INT8_MIN   (-128)
 

#define REJECT   reject_used_but_not_detected
 

#define TAG   2
 

#define tokwrap  )     1
 

#define UINT16_MAX   (65535U)
 

#define UINT32_MAX   (4294967295U)
 

#define UINT8_MAX   (255U)
 

#define unput  )     yyunput( c, (yytext_ptr) )
 

#define UTF8_TOKEN   4
 

 
#define YY_AT_BOL  )     (YY_CURRENT_BUFFER_LVALUE->yy_at_bol)
 

#define YY_BREAK   break;
 

#define YY_BUF_SIZE   16384
 

#define YY_BUFFER_EOF_PENDING   2
 

#define YY_BUFFER_NEW   0
 

#define YY_BUFFER_NORMAL   1
 

#define YY_CURRENT_BUFFER
 

Value:

( (yy_buffer_stack) \
                          ? (yy_buffer_stack)[(yy_buffer_stack_top)] \
                          : NULL)

#define YY_CURRENT_BUFFER_LVALUE   (yy_buffer_stack)[(yy_buffer_stack_top)]
 

#define YY_DECL   int toklex (void)
 

#define YY_DECL_IS_OURS   1
 

#define YY_DO_BEFORE_ACTION
 

Value:

(yytext_ptr) = yy_bp; \
        tokleng = (size_t) (yy_cp - yy_bp); \
        (yy_hold_char) = *yy_cp; \
        *yy_cp = '\0'; \
        (yy_c_buf_p) = yy_cp;

#define YY_END_OF_BUFFER   15
 

#define YY_END_OF_BUFFER_CHAR   0
 

#define YY_EXIT_FAILURE   2
 

#define YY_EXTRA_TYPE   void *
 

#define YY_FATAL_ERROR msg   )     yy_fatal_error( msg )
 

#define YY_FLEX_MAJOR_VERSION   2
 

#define YY_FLEX_MINOR_VERSION   5
 

#define YY_FLEX_SUBMINOR_VERSION   33
 

#define YY_FLUSH_BUFFER   tok_flush_buffer(YY_CURRENT_BUFFER )
 

#define YY_INPUT buf,
result,
max_size   ) 
 

Value:

if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
                { \
                int c = '*'; \
                size_t n; \
                for ( n = 0; n < max_size && \
                             (c = getc( tokin )) != EOF && c != '\n'; ++n ) \
                        buf[n] = (char) c; \
                if ( c == '\n' ) \
                        buf[n++] = (char) c; \
                if ( c == EOF && ferror( tokin ) ) \
                        YY_FATAL_ERROR( "input in flex scanner failed" ); \
                result = n; \
                } \
        else \
                { \
                errno=0; \
                while ( (result = fread(buf, 1, max_size, tokin))==0 && ferror(tokin)) \
                        { \
                        if( errno != EINTR) \
                                { \
                                YY_FATAL_ERROR( "input in flex scanner failed" ); \
                                break; \
                                } \
                        errno=0; \
                        clearerr(tokin); \
                        } \
                }\
\

#define YY_INT_ALIGNED   short int
 

#define YY_LESS_LINENO  ) 
 

#define YY_MORE_ADJ   0
 

#define yy_new_buffer   tok_create_buffer
 

#define YY_NEW_FILE   tokrestart(tokin )
 

#define YY_NULL   0
 

#define YY_NUM_RULES   14
 

#define YY_READ_BUF_SIZE   8192
 

#define YY_RESTORE_YY_MORE_OFFSET
 

#define YY_RULE_SETUP   YY_USER_ACTION
 

#define YY_SC_TO_UI  )     ((unsigned int) (unsigned char) c)
 

#define yy_set_bol at_bol   ) 
 

Value:

{ \
        if ( ! YY_CURRENT_BUFFER ){\
        tokensure_buffer_stack (); \
                YY_CURRENT_BUFFER_LVALUE =    \
            tok_create_buffer(tokin,YY_BUF_SIZE ); \
        } \
        YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \
        }

#define yy_set_interactive is_interactive   ) 
 

Value:

{ \
        if ( ! YY_CURRENT_BUFFER ){ \
        tokensure_buffer_stack (); \
                YY_CURRENT_BUFFER_LVALUE =    \
            tok_create_buffer(tokin,YY_BUF_SIZE ); \
        } \
        YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \
        }

#define YY_SKIP_YYWRAP
 

#define YY_START   (((yy_start) - 1) / 2)
 

#define YY_START_STACK_INCR   25
 

#define YY_STATE_BUF_SIZE   ((YY_BUF_SIZE + 2) * sizeof(yy_state_type))
 

#define YY_STATE_EOF state   )     (YY_END_OF_BUFFER + state + 1)
 

#define yyconst
 

#define yyless  ) 
 

Value:

do \
                { \
                  \
        int yyless_macro_arg = (n); \
        YY_LESS_LINENO(yyless_macro_arg);\
                toktext[tokleng] = (yy_hold_char); \
                (yy_c_buf_p) = toktext + yyless_macro_arg; \
                (yy_hold_char) = *(yy_c_buf_p); \
                *(yy_c_buf_p) = '\0'; \
                tokleng = yyless_macro_arg; \
                } \
        while ( 0 )

#define yyless  ) 
 

Value:

do \
                { \
                  \
        int yyless_macro_arg = (n); \
        YY_LESS_LINENO(yyless_macro_arg);\
                *yy_cp = (yy_hold_char); \
                YY_RESTORE_YY_MORE_OFFSET \
                (yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \
                YY_DO_BEFORE_ACTION;   \
                } \
        while ( 0 )

 
#define yymore  )     yymore_used_but_not_detected
 

#define YYSTATE   YY_START
 

#define YYTABLES_NAME   "yytables"
 

 
#define yyterminate  )     return YY_NULL
 

#define yytext_ptr   toktext
 

#define ZAP   1
 


Typedef Documentation

typedef short int flex_int16_t
 

typedef int flex_int32_t
 

typedef signed char flex_int8_t
 

typedef unsigned short int flex_uint16_t
 

typedef unsigned int flex_uint32_t
 

typedef unsigned char flex_uint8_t
 

typedef struct yy_buffer_state* YY_BUFFER_STATE
 

typedef unsigned char YY_CHAR
 

typedef unsigned int yy_size_t
 

typedef int yy_state_type
 


Function Documentation

int input void   )  [static]
 

YY_BUFFER_STATE tok_create_buffer FILE *  file,
int  size
 

void tok_delete_buffer YY_BUFFER_STATE  b  ) 
 

void tok_flush_buffer YY_BUFFER_STATE  b  ) 
 

void tok_init_buffer YY_BUFFER_STATE  b,
FILE *  file
[static]
 

void tok_load_buffer_state void   )  [static]
 

YY_BUFFER_STATE tok_scan_buffer char *  base,
yy_size_t  size
 

YY_BUFFER_STATE tok_scan_bytes yyconst char *  bytes,
int  len
 

YY_BUFFER_STATE tok_scan_string yyconst char *  yy_str  ) 
 

void tok_switch_to_buffer YY_BUFFER_STATE  new_buffer  ) 
 

void* tokalloc yy_size_t   ) 
 

void tokensure_buffer_stack void   )  [static]
 

void tokfree void *   ) 
 

int toklex void   ) 
 

void tokpop_buffer_state void   ) 
 

void tokpush_buffer_state YY_BUFFER_STATE  new_buffer  ) 
 

void* tokrealloc void *  ,
yy_size_t 
 

void tokrestart FILE *  input_file  ) 
 

void yy_fatal_error yyconst char  msg[]  )  [static]
 

int yy_get_next_buffer void   )  [static]
 

yy_state_type yy_get_previous_state void   )  [static]
 

int yy_init_globals void   )  [static]
 

yy_state_type yy_try_NUL_trans yy_state_type  current_state  )  [static]
 

void yyunput int  c,
char *  buf_ptr
[static]
 


Variable Documentation

long byte_position [static]
 

int tok_flex_debug = 0
 

FILE * tokin = (FILE *) 0 *tokout = (FILE *) 0
 

int tokleng
 

int toklineno = 1
 

FILE * tokout
 

char * toktext
 

yyconst flex_int16_t yy_accept[68] [static]
 

Initial value:

    {   0,
        0,    0,    0,    0,   15,   13,   12,   13,    9,   13,
        9,   13,    9,   11,    2,    2,    2,    2,    4,    2,
        2,    2,    2,    0,    0,    9,   10,    8,    9,   11,
        0,    0,    2,    2,    2,    2,    0,    2,    2,    2,
        2,    2,    0,    0,    7,    0,    0,    0,    0,    5,
        0,    6,    2,    2,    2,    3,    2,    2,    2,    2,
        2,    0,   10,    8,    1,    2,    2
    }

YY_BUFFER_STATE* yy_buffer_stack = 0 [static]
 

Stack as an array.

size_t yy_buffer_stack_max = 0 [static]
 

capacity of stack.

size_t yy_buffer_stack_top = 0 [static]
 

index of top of stack.

char* yy_c_buf_p = (char *) 0 [static]
 

int yy_did_buffer_switch_on_eof [static]
 

char yy_hold_char [static]
 

int yy_init = 0 [static]
 

char* yy_last_accepting_cpos [static]
 

yy_state_type yy_last_accepting_state [static]
 

int yy_n_chars [static]
 

yyconst yy_state_type yy_NUL_trans[68] [static]
 

Initial value:

    {   0,
        6,    6,   15,   15,    0,    0,    0,    0,    0,    0,
        0,    0,    0,    0,   33,   33,   33,   33,    0,   33,
       33,   33,   33,    0,    0,    0,    0,    0,    0,    0,
       48,   51,   33,   33,   33,   33,    0,   33,   33,   33,
       58,   60,    0,    0,    0,    0,    0,   48,   48,    0,
       51,    0,   33,   33,   33,    0,   33,   58,   33,   60,
       33,    0,    0,    0,   48,   33,   33
    }

yyconst flex_int16_t yy_nxt[][256] [static]
 

int yy_start = 0 [static]
 


Generated on Tue Jun 15 11:02:57 2010 for Lemur by doxygen 1.3.4