00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef _EVALLM_PROTS_
00022 #define _EVALLM_PROTS_
00023
00024 #include "pc_libs/pc_general.h"
00025 #include "rr_libs/general.h"
00026 #include "ngram.h"
00027 #include "toolkit.h"
00028
00029
00030
00039 typedef struct {
00040 flag backed_off;
00041 flag inclusive;
00042 } fb_info;
00043
00044 typedef float bo_t;
00045 typedef float prob_t;
00046
00053 typedef struct {
00054
00055 unsigned short n;
00057
00058
00059 sih_t *vocab_ht;
00060 unsigned short vocab_size;
00061 char **vocab;
00062 flag *context_cue;
00064 int no_of_ccs;
00066
00067
00068 table_size_t *table_sizes;
00069 id__t **word_id;
00071 bo_t **bo_weight;
00072 prob_t **probs;
00073 index__t **ind;
00075
00076
00077 ptr_tab_t **ptr_table;
00079 unsigned short *ptr_table_size;
00081
00082
00083 int *num_kgrams;
00087 unsigned short vocab_type;
00089 unsigned short first_id;
00092 } arpa_lm_t;
00093
00094
00095
00096 unsigned short num_of_types(int k,
00097 int ind,
00098 ng_t *ng);
00099 void decode_bo_case(int bo_case,
00100 int context_length,
00101 FILE *annotation_fp);
00102
00103 void display_stats(ng_t *ng);
00104
00105 void display_arpa_stats(arpa_lm_t *arpa_ng);
00106
00107 void load_lm(ng_t *ng,
00108 char *lm_filename);
00109
00111 void load_arpa_lm(arpa_lm_t *arpa_lm,
00112 char *lm_filename
00113 );
00114
00115
00116 void parse_comline(char *input_line,
00117 int *num_of_args,
00118 char **args);
00119
00120 void compute_perplexity(ng_t *ng,
00121 arpa_lm_t *arpa_ng,
00122 char *text_stream_filename,
00123 char *probs_stream_filename,
00124 char *annotation_filename,
00125 char *oov_filename,
00126 char *fb_list_filename,
00127 flag backoff_from_unk_inc,
00128 flag backoff_from_unk_exc,
00129 flag backoff_from_ccs_inc,
00130 flag backoff_from_ccs_exc,
00131 flag arpa_lm,
00132 flag include_unks,
00133 double log_base);
00134
00135 fb_info *gen_fb_list(sih_t *vocab_ht,
00136 int vocab_size,
00137 char **vocab,
00138 flag *context_cue,
00139 flag backoff_from_unk_inc,
00140 flag backoff_from_unk_exc,
00141 flag backoff_from_ccs_inc,
00142 flag backoff_from_ccs_exc,
00143 char *fb_list_filename);
00144
00145 void validate(ng_t *ng,
00146 arpa_lm_t *arpa_ng,
00147 char **words,
00148 flag backoff_from_unk_inc,
00149 flag backoff_from_unk_exc,
00150 flag backoff_from_ccs_inc,
00151 flag backoff_from_ccs_exc,
00152 flag arpa_lm,
00153 char *fb_list_filename);
00154
00155 double calc_prob_of(id__t sought_word,
00156 id__t *context,
00157 int context_length,
00158 ng_t *ng,
00159 arpa_lm_t *arpa_ng,
00160 fb_info *fb_list,
00161 int *bo_case,
00162 int *actual_context_length,
00163 flag arpa_lm);
00164
00165 void arpa_bo_ng_prob(int context_length,
00166 id__t *sought_ngram,
00167 arpa_lm_t *arpa_ng,
00168 int verbosity,
00169 double *p_prob,
00170 int *bo_case);
00171
00172
00173 #endif