00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00027 #include "evallm.h"
00028 #include <stdio.h>
00029 #include <string.h>
00030
00031 fb_info *gen_fb_list(sih_t *vocab_ht,
00032 int vocab_size,
00033 char **vocab,
00034 flag *context_cue,
00035 flag backoff_from_unk_inc,
00036 flag backoff_from_unk_exc,
00037 flag backoff_from_ccs_inc,
00038 flag backoff_from_ccs_exc,
00039 char *fb_list_filename) {
00040
00041 fb_info *fb_list;
00042 int i;
00043 FILE *fb_list_file;
00044 char current_fb_word[500];
00045 char inc_or_exc[500];
00046 int current_fb_id;
00047 char wlist_entry[1024];
00048
00049 fb_list = (fb_info *) rr_calloc(vocab_size+1,sizeof(fb_info));
00050
00051 if (backoff_from_unk_inc) {
00052 fb_list[0].backed_off = 1;
00053 fb_list[0].inclusive = 1;
00054 }
00055
00056 if (backoff_from_unk_exc) {
00057 fb_list[0].backed_off = 1;
00058 fb_list[0].inclusive = 0;
00059 }
00060
00061 if (backoff_from_ccs_inc || backoff_from_ccs_exc) {
00062 for (i=0;i<=vocab_size;i++) {
00063 if (context_cue[i]) {
00064 fb_list[i].backed_off = 1;
00065 if (backoff_from_ccs_inc) {
00066 fb_list[i].inclusive = 1;
00067 }
00068 else {
00069 fb_list[i].inclusive = 0;
00070 }
00071 }
00072 }
00073 }
00074
00075 if (strcmp(fb_list_filename,"")) {
00076 fb_list_file = rr_iopen(fb_list_filename);
00077 while (fgets (wlist_entry, sizeof (wlist_entry),fb_list_file)) {
00078 if (strncmp(wlist_entry,"##",2)==0) continue;
00079 sscanf (wlist_entry, "%s %s",current_fb_word,inc_or_exc);
00080 if (strncmp(wlist_entry,"#",1)==0) {
00081 fprintf(stderr,"\n\n===========================================================\n");
00082 fprintf(stderr,":\nWARNING: line assumed NOT a comment:\n");
00083 fprintf(stderr, ">>> %s <<<\n",wlist_entry);
00084 fprintf(stderr, " '%s' will be included in the forced backoff list\n",current_fb_word);
00085 fprintf(stderr, " (comments must start with '##')\n");
00086 fprintf(stderr,"===========================================================\n\n");
00087 }
00088
00089
00090 if (sih_lookup(vocab_ht,current_fb_word,¤t_fb_id) == 0) {
00091 fprintf(stderr,"Error : %s in the forced backoff list does not appear in the vocabulary.",current_fb_word);
00092 }
00093
00094
00095
00096 if (inc_or_exc[0] == 'i' || inc_or_exc[0] == 'I') {
00097 fb_list[current_fb_id].inclusive = 1;
00098 fb_list[current_fb_id].backed_off = 1;
00099 }
00100 else {
00101 if (inc_or_exc[0] == 'e' || inc_or_exc[0] == 'E') {
00102 fb_list[current_fb_id].inclusive = 0;
00103 fb_list[current_fb_id].backed_off = 1;
00104 }
00105 else {
00106 fprintf(stderr,"Error in line of forced back-off list file.\nLine is : %s\n",wlist_entry);
00107 }
00108 }
00109 }
00110 rr_iclose(fb_list_file);
00111 }
00112
00113 return (fb_list);
00114 }
00115
00116