Main Page   Compound List   File List   Compound Members   File Members  

arpa_bo_ng_prob.c

Go to the documentation of this file.
00001 /*=====================================================================
00002                 =======   COPYRIGHT NOTICE   =======
00003 Copyright (C) 1996, Carnegie Mellon University, Cambridge University,
00004 Ronald Rosenfeld and Philip Clarkson.
00005 
00006 All rights reserved.
00007 
00008 This software is made available for research purposes only.  It may be
00009 redistributed freely for this purpose, in full or in part, provided
00010 that this entire copyright notice is included on any copies of this
00011 software and applications and derivations thereof.
00012 
00013 This software is provided on an "as is" basis, without warranty of any
00014 kind, either expressed or implied, as to any matter including, but not
00015 limited to warranty of fitness of purpose, or merchantability, or
00016 results obtained from use of this software.
00017 ======================================================================*/
00018 
00019 
00020 #include "evallm.h"
00021 #include "idngram2lm.h"
00022 #include <stdlib.h>
00023 #include <math.h>
00024 
00025 void arpa_bo_ng_prob(int context_length,
00026                      id__t *sought_ngram,
00027                      arpa_lm_t *arpa_ng,
00028                      int verbosity,
00029                      double *p_prob,
00030                      int *bo_case) {
00031 
00032   
00033   flag found;
00034   flag found_ngram;
00035   flag found_context;
00036   flag still_found;
00037 
00038   int length_exists;
00039   int ng_begin;
00040   int ng_end;
00041   int ng_middle;
00042   int *ng_index;
00043 
00044   int i;
00045   
00046   int temp_case;
00047 
00048   double alpha;
00049   double prob;
00050 
00051   alpha = 0.0; /* To give no warnings at compilation time */
00052 
00053   ng_index = (int *) rr_malloc((context_length+1)*sizeof(int));
00054 
00055   if (context_length == 0) {
00056     *p_prob = pow(10.0,arpa_ng->probs[0][sought_ngram[0]]);
00057   }
00058   else {
00059 
00060     found_ngram = 0;
00061     found_context = 0;
00062 
00063     /* Find the appropriate (context-length+1)-gram */
00064 
00065     length_exists = 0;
00066     still_found = 1;
00067 
00068     while (still_found && (length_exists < (context_length+1))) {
00069       
00070       found = 0;
00071 
00072       /* Look for (length_exists+1)-gram */
00073 
00074       if (length_exists == 0) {
00075 
00076         if (get_full_index(arpa_ng->ind[0][sought_ngram[0]],
00077                            arpa_ng->ptr_table[0],
00078                            arpa_ng->ptr_table_size[0],
00079                            sought_ngram[0]) <
00080             get_full_index(arpa_ng->ind[0][sought_ngram[0]+1],
00081                            arpa_ng->ptr_table[0],
00082                            arpa_ng->ptr_table_size[0],
00083                            sought_ngram[0]+1)) {
00084            found = 1;
00085           ng_index[0] = sought_ngram[0];
00086         }
00087       }
00088       else {
00089 
00090         /* Binary search for right ngram */
00091 
00092         ng_begin = 
00093           get_full_index(arpa_ng->ind[length_exists-1][ng_index[length_exists-1]],
00094                          arpa_ng->ptr_table[length_exists-1],
00095                          arpa_ng->ptr_table_size[length_exists-1],
00096                          ng_index[length_exists-1]);
00097 
00098         if (length_exists == 1) {
00099           if (ng_index[0] < arpa_ng->vocab_size) {
00100             ng_end = 
00101               get_full_index(arpa_ng->ind[length_exists-1][ng_index[length_exists-1]+1],
00102                              arpa_ng->ptr_table[length_exists-1],
00103                              arpa_ng->ptr_table_size[length_exists-1],
00104                              ng_index[length_exists-1]+1)-1;
00105           }
00106           else {
00107             ng_end = arpa_ng->num_kgrams[1];
00108           }
00109         }
00110         else {
00111           if (ng_index[length_exists-1] < 
00112               arpa_ng->num_kgrams[length_exists-1]-1) {
00113             ng_end = 
00114               get_full_index(arpa_ng->ind[length_exists-1][ng_index[length_exists-1]+1],
00115                              arpa_ng->ptr_table[length_exists-1],
00116                              arpa_ng->ptr_table_size[length_exists-1],
00117                              ng_index[length_exists-1]+1)-1;
00118           }
00119           else {
00120             ng_end = arpa_ng->num_kgrams[length_exists];
00121           }
00122         } 
00123 
00124         while (ng_begin <= ng_end) {
00125           ng_middle = ng_begin + ((ng_end - ng_begin) >> 1);
00126           if (sought_ngram[length_exists] < 
00127               arpa_ng->word_id[length_exists][ng_middle]) {
00128             ng_end = ng_middle - 1;
00129           }
00130           else {
00131             if (sought_ngram[length_exists] > 
00132                 arpa_ng->word_id[length_exists][ng_middle]) {
00133               ng_begin = ng_middle + 1;
00134             }
00135             else {
00136               found = 1;
00137               ng_index[length_exists] = ng_middle;
00138               break;
00139             }
00140           }
00141         }
00142       }
00143 
00144       if (found) {
00145         length_exists++;
00146       }
00147       else {
00148         still_found = 0;
00149       }
00150 
00151     }
00152     if (length_exists == (context_length+1)) {
00153       found_ngram = 1;
00154     }
00155     if (length_exists >= context_length) {
00156       found_context = 1;
00157     }
00158     if (found_context) {
00159       alpha = pow(10.0,arpa_ng->bo_weight[context_length-1][ng_index[context_length-1]]);
00160     }
00161 
00162     if (found_ngram) {
00163       prob = pow(10.0,arpa_ng->probs[context_length][ng_index[context_length]]);
00164       temp_case = 0;
00165     }
00166     else {
00167       arpa_bo_ng_prob(context_length-1,
00168                       &sought_ngram[1],
00169                       arpa_ng,
00170                       verbosity,
00171                       &prob,
00172                       bo_case);
00173 
00174       temp_case = 2;      
00175       if (found_context) {
00176         prob*=alpha;
00177         temp_case=1;
00178       }
00179       
00180       
00181 
00182     }
00183 
00184     /*
00185      * PWP: coding change.  Numbers were previously coded base-3.
00186      * Now base-4, since (int) pow(4,i) == 1 << (2*i)
00187      */
00188     
00189     *p_prob = prob;
00190     *bo_case += temp_case * (1 << (2*(context_length-1)));
00191   
00192 
00193   }
00194 
00195   if (*p_prob > 1.0) {
00196     fprintf(stderr,"Error : P( %d | ",sought_ngram[context_length]);
00197     for (i=0;i<=context_length-1;i++) {
00198       fprintf(stderr,"%d ",sought_ngram[i]);
00199     }
00200     fprintf(stderr,") = %g\n",*p_prob);
00201     exit(1);
00202   }
00203   free(ng_index);
00204 
00205 }

Generated on Tue Dec 21 13:54:44 2004 by doxygen1.2.18