Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

arabic_stemmer.h

Go to the documentation of this file.
00001 
00002 /**************************************************************************/
00003 /**************************************************************************/
00004 /**************            ARABIC STEMMER HEADER FILE         *************/
00005 /**************************************************************************/
00006 /**************************************************************************/
00007 
00008 /*
00009 
00010         Copyright (c) 2001 UMASS CIIR All rights reserved.
00011         Written by Nick Dufresne (nickd@cs.brandeis.edu)
00012         
00013         
00014 */
00015 
00016 
00017 #include <stdio.h>
00018 #include <string.h>
00019 #include <stdlib.h>
00020 #include <ctype.h>
00021 #include <malloc.h>
00022 
00023 void arabic_remove_diacritics (char *, char *);
00024 //removes diacritics from word
00025 
00026 void arabic_stop(char *, char *);  // only removes stops
00027 void no_stem(char *, char *) ;     // doesn't do anything
00028 
00029 //normalize arabic word
00030 void arabic_norm2(char *, char *);
00031 void arabic_norm2_stop(char *, char *);
00032 void arabic_light10(char *, char *);   
00033 void arabic_light10_stop(char *, char *);   
00034 
00035 //use this to clean up hash files created to hold the words list
00036 void arabic_clean_up (void);
00037 
00038 
00039 /******* set_stemmer support **********/
00040 
00041 typedef struct {
00042      char *option;
00043      char *description;
00044      void (*stem_fct)(char *, char *) ;
00045 } stem_info_t;
00046 
00047 void show_stemmer_options() ;
00048 void *set_stemmer(char *) ;
00049 char *stem_phrase(char *phrase, int *numtoks, void (*stemmer)(char*,char*)) ;
00050 
00051 #define NUMSTEMMERS 6
00052 #define ARABIC_BREAKS  " \t\r\n~`!@#$%^&*()_-+=[]{}|\\:;\"'<>,.?/\xA1\xF7\xD7\xBA\xBF"
00053 // Edit arabic_stem.c to add stemmers and make available to other programs
00054 

Generated on Wed Nov 3 12:58:51 2004 for Lemur Toolkit by doxygen1.2.18