00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include "evallm.h"
00021 #include "idngram2lm.h"
00022 #include <stdlib.h>
00023 #include <math.h>
00024
00025 void arpa_bo_ng_prob(int context_length,
00026 id__t *sought_ngram,
00027 arpa_lm_t *arpa_ng,
00028 int verbosity,
00029 double *p_prob,
00030 int *bo_case) {
00031
00032
00033 flag found;
00034 flag found_ngram;
00035 flag found_context;
00036 flag still_found;
00037
00038 int length_exists;
00039 int ng_begin;
00040 int ng_end;
00041 int ng_middle;
00042 int *ng_index;
00043
00044 int i;
00045
00046 int temp_case;
00047
00048 double alpha;
00049 double prob;
00050
00051 alpha = 0.0;
00052
00053 ng_index = (int *) rr_malloc((context_length+1)*sizeof(int));
00054
00055 if (context_length == 0) {
00056 *p_prob = pow(10.0,arpa_ng->probs[0][sought_ngram[0]]);
00057 }
00058 else {
00059
00060 found_ngram = 0;
00061 found_context = 0;
00062
00063
00064
00065 length_exists = 0;
00066 still_found = 1;
00067
00068 while (still_found && (length_exists < (context_length+1))) {
00069
00070 found = 0;
00071
00072
00073
00074 if (length_exists == 0) {
00075
00076 if (get_full_index(arpa_ng->ind[0][sought_ngram[0]],
00077 arpa_ng->ptr_table[0],
00078 arpa_ng->ptr_table_size[0],
00079 sought_ngram[0]) <
00080 get_full_index(arpa_ng->ind[0][sought_ngram[0]+1],
00081 arpa_ng->ptr_table[0],
00082 arpa_ng->ptr_table_size[0],
00083 sought_ngram[0]+1)) {
00084 found = 1;
00085 ng_index[0] = sought_ngram[0];
00086 }
00087 }
00088 else {
00089
00090
00091
00092 ng_begin =
00093 get_full_index(arpa_ng->ind[length_exists-1][ng_index[length_exists-1]],
00094 arpa_ng->ptr_table[length_exists-1],
00095 arpa_ng->ptr_table_size[length_exists-1],
00096 ng_index[length_exists-1]);
00097
00098 if (length_exists == 1) {
00099 if (ng_index[0] < arpa_ng->vocab_size) {
00100 ng_end =
00101 get_full_index(arpa_ng->ind[length_exists-1][ng_index[length_exists-1]+1],
00102 arpa_ng->ptr_table[length_exists-1],
00103 arpa_ng->ptr_table_size[length_exists-1],
00104 ng_index[length_exists-1]+1)-1;
00105 }
00106 else {
00107 ng_end = arpa_ng->num_kgrams[1];
00108 }
00109 }
00110 else {
00111 if (ng_index[length_exists-1] <
00112 arpa_ng->num_kgrams[length_exists-1]-1) {
00113 ng_end =
00114 get_full_index(arpa_ng->ind[length_exists-1][ng_index[length_exists-1]+1],
00115 arpa_ng->ptr_table[length_exists-1],
00116 arpa_ng->ptr_table_size[length_exists-1],
00117 ng_index[length_exists-1]+1)-1;
00118 }
00119 else {
00120 ng_end = arpa_ng->num_kgrams[length_exists];
00121 }
00122 }
00123
00124 while (ng_begin <= ng_end) {
00125 ng_middle = ng_begin + ((ng_end - ng_begin) >> 1);
00126 if (sought_ngram[length_exists] <
00127 arpa_ng->word_id[length_exists][ng_middle]) {
00128 ng_end = ng_middle - 1;
00129 }
00130 else {
00131 if (sought_ngram[length_exists] >
00132 arpa_ng->word_id[length_exists][ng_middle]) {
00133 ng_begin = ng_middle + 1;
00134 }
00135 else {
00136 found = 1;
00137 ng_index[length_exists] = ng_middle;
00138 break;
00139 }
00140 }
00141 }
00142 }
00143
00144 if (found) {
00145 length_exists++;
00146 }
00147 else {
00148 still_found = 0;
00149 }
00150
00151 }
00152 if (length_exists == (context_length+1)) {
00153 found_ngram = 1;
00154 }
00155 if (length_exists >= context_length) {
00156 found_context = 1;
00157 }
00158 if (found_context) {
00159 alpha = pow(10.0,arpa_ng->bo_weight[context_length-1][ng_index[context_length-1]]);
00160 }
00161
00162 if (found_ngram) {
00163 prob = pow(10.0,arpa_ng->probs[context_length][ng_index[context_length]]);
00164 temp_case = 0;
00165 }
00166 else {
00167 arpa_bo_ng_prob(context_length-1,
00168 &sought_ngram[1],
00169 arpa_ng,
00170 verbosity,
00171 &prob,
00172 bo_case);
00173
00174 temp_case = 2;
00175 if (found_context) {
00176 prob*=alpha;
00177 temp_case=1;
00178 }
00179
00180
00181
00182 }
00183
00184
00185
00186
00187
00188
00189 *p_prob = prob;
00190 *bo_case += temp_case * (1 << (2*(context_length-1)));
00191
00192
00193 }
00194
00195 if (*p_prob > 1.0) {
00196 fprintf(stderr,"Error : P( %d | ",sought_ngram[context_length]);
00197 for (i=0;i<=context_length-1;i++) {
00198 fprintf(stderr,"%d ",sought_ngram[i]);
00199 }
00200 fprintf(stderr,") = %g\n",*p_prob);
00201 exit(1);
00202 }
00203 free(ng_index);
00204
00205 }