Main Page Namespace List Class Hierarchy Alphabetical List Compound List File List Namespace Members Compound Members File Members Related Pages

arabic_stem.cpp File Reference

#include "arabic_stemmer.h"
#include "WordSet.hpp"


Defines
#define	STEM_TO_WORD 99
#define	STEM_WORD 0
#define	WAW "0xe6"
#define	CHAR_WAW 0xe6
Functions
void	light_stem (char , char )
int	remove_diacritics (char , char )
void	remove_definite_articles (char , char )
char *	substring (const char *, int, int)
void	freeWordSets ()
void	substring_copy (char dest[], const char *word, int start, int end)
int	Str_equals (const char s1, const char s2)
int	is_whitespace (const char c)
void	load_static_files (const char *path)
void	check_stemmer_files ()
void	remove_all_suffixes (char word, char result, size_t lenlimit)
void	arabic_clean_up (void)
void	no_stem (char word, char result)
int	on_stop_list (char *word)
void	arabic_stop (char word, char result)
void	arabic_norm2 (char word, char result)
void	arabic_norm2_stop (char word, char result)
void	arabic_light10 (char word, char result)
void	arabic_light10_stop (char word, char result)
void	show_stemmer_options ()
void *	set_stemmer (char *stemval)
char *	stem_phrase (char phrase, int numtoks, void(stemmer)(char , char *))
Variables
char *	defarticles [] = {"Çá", "æÇá","ÈÇá", "ßÇá", "ÝÇá", "áá", "\0"}
char *	suffixes [] = {"åÇ","Çä","ÇÊ","æä","íä","íå","íÉ","å","É","í","\0"}
stem_info_t	stemtable [NUMSTEMMERS]
int	files_loaded = 0
const int	isWhitespace [256]
const int	NormChar [256]
const int	Norm3Char [256]
const int	ArabicVowel [256]
WordSet *	stop_words_ht
char *	arabic_stemdir

Define Documentation

#define CHAR_WAW 0xe6

#define STEM_TO_WORD 99

#define STEM_WORD 0

#define WAW "0xe6"

Function Documentation

void arabic_clean_up ( void )

void arabic_light10 ( char * word,

char * result

)

void arabic_light10_stop ( char * word,

char * result

)

void arabic_norm2 ( char * word,

char * result

)

void arabic_norm2_stop ( char * word,

char * result

)

void arabic_stop ( char * word,

char * result

)

void check_stemmer_files ( )

void freeWordSets ( )

int is_whitespace ( const char c )

void light_stem ( char * ,

char *

)

void load_static_files ( const char * path )

void no_stem ( char * word,

char * result

)

int on_stop_list ( char * word )

void remove_all_suffixes ( char * word,

char * result,

size_t lenlimit

)

void remove_definite_articles ( char * ,

char *

)

int remove_diacritics ( char * ,

char *

)

void* set_stemmer ( char * stemval )

void show_stemmer_options ( )

char* stem_phrase ( char * phrase,

int * numtoks,

void(* stemmer)(char *, char *)

)

int Str_equals ( const char * s1,

const char * s2

)

char * substring ( const char * ,

int ,

int

)

void substring_copy ( char dest[],

const char * word,

int start,

int end

)

Variable Documentation

char* arabic_stemdir

const int ArabicVowel[256]

Initial value:
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0xe6,0,0,0,0,0,0xec,0xed,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}

char* defarticles[] = {"Çá", "æÇá","ÈÇá", "ßÇá", "ÝÇá", "áá", "\0"}

int files_loaded = 0

const int isWhitespace[256]

Initial value:
{ 0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1, 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}

const int Norm3Char[256]

Initial value:
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0x81,0,0,0,0,0,0,0,0,0x8a,0,0,0x8d,0x8e,0x8f, 0x90,0,0,0,0,0,0,0,0x98,0,0x9a,0,0,0,0,0x9f, 0,0,0,0,0,0,0,0,0,0,0xaa,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0xc0,0xc7,0xc7,0xc7,0xc7,0xc7,0xc7,0xc7,0xc8,0xe5,0xca,0xcb,0xcc,0xcd,0xce,0xcf, 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, 0,0xd8,0xd9,0xda,0xdb, 0,0xdd,0xde,0xdf, 0,0xe1, 0,0xe3,0xe4,0xe5,0xe6, 0, 0, 0, 0, 0,0xed, 0xed, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}

const int NormChar[256]

Initial value:
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0x81,0,0,0,0,0,0,0,0,0x8a,0,0,0x8d,0x8e,0x8f, 0x90,0,0,0,0,0,0,0,0x98,0,0x9a,0,0,0,0,0x9f, 0,0,0,0,0,0,0,0,0,0,0xaa,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0xc0,0xc1,0xc7,0xc7,0xc4,0xc7,0xc6,0xc7,0xc8,0xe5,0xca,0xcb,0xcc,0xcd,0xce,0xcf, 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, 0,0xd8,0xd9,0xda,0xdb, 0,0xdd,0xde,0xdf, 0,0xe1, 0,0xe3,0xe4,0xe5,0xe6, 0, 0, 0, 0, 0,0xed, 0xed, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}

stem_info_t stemtable[NUMSTEMMERS]

Initial value:
{ {"none", "none", no_stem}, {"arabic_stop", "arabic_stop", arabic_stop}, {"arabic_norm2", "table normalization", arabic_norm2}, {"arabic_norm2_stop", "table normalization with stopping", arabic_norm2_stop}, {"arabic_light10", "light stemming", arabic_light10}, {"arabic_light10_stop", "light10 and remove stop words", arabic_light10_stop} }

WordSet* stop_words_ht

char* suffixes[] = {"åÇ","Çä","ÇÊ","æä","íä","íå","íÉ","å","É","í","\0"}

Generated on Wed Nov 3 12:59:08 2004 for Lemur Toolkit by

1.2.18

arabic_stem.cpp File Reference

Defines

Functions

Variables

Define Documentation

Function Documentation

Variable Documentation