00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 #include <stdio.h>
00018 #include <string.h>
00019 #include <stdlib.h>
00020 #include <ctype.h>
00021 #include <malloc.h>
00022
00023 void arabic_remove_diacritics (char *, char *);
00024
00025
00026 void arabic_stop(char *, char *);
00027 void no_stem(char *, char *) ;
00028
00029
00030 void arabic_norm2(char *, char *);
00031 void arabic_norm2_stop(char *, char *);
00032 void arabic_light10(char *, char *);
00033 void arabic_light10_stop(char *, char *);
00034
00035
00036 void arabic_clean_up (void);
00037
00038
00039
00040
00041 typedef struct {
00042 char *option;
00043 char *description;
00044 void (*stem_fct)(char *, char *) ;
00045 } stem_info_t;
00046
00047 void show_stemmer_options() ;
00048 void *set_stemmer(char *) ;
00049 char *stem_phrase(char *phrase, int *numtoks, void (*stemmer)(char*,char*)) ;
00050
00051 #define NUMSTEMMERS 6
00052 #define ARABIC_BREAKS " \t\r\n~`!@#$%^&*()_-+=[]{}|\\:;\"'<>,.?/\xA1\xF7\xD7\xBA\xBF"
00053
00054