Main Page   Compound List   File List   Compound Members   File Members  

increment_context.c

Go to the documentation of this file.
00001 
00002 /*=====================================================================
00003                 =======   COPYRIGHT NOTICE   =======
00004 Copyright (C) 1996, Carnegie Mellon University, Cambridge University,
00005 Ronald Rosenfeld and Philip Clarkson.
00006 
00007 All rights reserved.
00008 
00009 This software is made available for research purposes only.  It may be
00010 redistributed freely for this purpose, in full or in part, provided
00011 that this entire copyright notice is included on any copies of this
00012 software and applications and derivations thereof.
00013 
00014 This software is provided on an "as is" basis, without warranty of any
00015 kind, either expressed or implied, as to any matter including, but not
00016 limited to warranty of fitness of purpose, or merchantability, or
00017 results obtained from use of this software.
00018 ======================================================================*/
00019 
00020 #include "ngram.h"
00021 #include "idngram2lm.h"
00022 #include <stdlib.h>
00023 
00027 void increment_context(ng_t *ng,int k,int verbosity) {
00028  
00029   
00030   int current_count;
00031   int j;
00032   int current_table;
00033   int *current_pos;
00034   int *end_pos;
00035   
00036   flag discounted;
00037 
00038 
00039 
00040   /* Scan all the (k+1)-grams (i.e. those in table k). If any of them
00041      are followed by only one (k+2)-gram, and its count is bigger
00042      than the discounting range, then increment the count of the
00043      (k+1)-gram. Technique first introduced by Doug Paul. */
00044 
00045   current_pos = (int *)rr_calloc(k+1,sizeof(int));
00046   end_pos = (int *)rr_calloc(k+1,sizeof(int)); 
00047 
00048   current_count = 0;
00049   discounted = 0;
00050 
00051   
00052   for (current_pos[0]=ng->first_id;
00053        current_pos[0]<=ng->vocab_size;
00054        current_pos[0]++) {
00055  
00056     if (return_count(ng->four_byte_counts,
00057                      ng->count_table[0],
00058                      ng->marg_counts,
00059                      ng->marg_counts4,
00060                      current_pos[0]) > 0) {
00061 
00062       current_table = 1;
00063       
00064       if (current_pos[0] == ng->vocab_size) {
00065         end_pos[1] = ng->num_kgrams[1]-1;
00066       }
00067       else {
00068         end_pos[1] = get_full_index(ng->ind[0][current_pos[0]+1],
00069                                     ng->ptr_table[0],
00070                                     ng->ptr_table_size[0],
00071                                     current_pos[0]+1)-1;
00072       }
00073       
00074       while (current_table > 0) {
00075         
00076         if (current_table == k) {
00077           
00078           if (current_pos[k] <= end_pos[k]) {
00079 
00080             current_count += return_count(ng->four_byte_counts,
00081                                           ng->count_table[k],
00082                                           ng->count[k],
00083                                           ng->count4[k],
00084                                           current_pos[k]);
00085 
00086             if (return_count(ng->four_byte_counts,
00087                              ng->count_table[k],
00088                              ng->count[k],
00089                              ng->count4[k],
00090                              current_pos[k]) <= ng->disc_range[k]) {
00091               discounted = 1;
00092             }
00093             current_pos[k]++;
00094           }
00095 
00096           else {
00097 
00098             if (k == 1) {
00099               if (current_count >= return_count(ng->four_byte_counts,
00100                                                 ng->count_table[0],
00101                                                 ng->marg_counts,
00102                                                 ng->marg_counts4,
00103                                                 current_pos[k-1]) 
00104                   && !discounted) {
00105                 
00106                 store_count(ng->four_byte_counts,
00107                             ng->count_table[0],
00108                             ng->count_table_size,
00109                             ng->marg_counts,
00110                             ng->marg_counts4,
00111                             current_pos[0],
00112                             return_count(ng->four_byte_counts,
00113                                          ng->count_table[0],
00114                                          ng->marg_counts,
00115                                          ng->marg_counts4,
00116                                          current_pos[0])+1); 
00117 
00118 
00119               }
00120             }
00121             else {
00122               if ((current_count >= return_count(ng->four_byte_counts,
00123                                                  ng->count_table[k-1],
00124                                                  ng->count[k-1],
00125                                                  ng->count4[k-1],
00126                                                  current_pos[k-1])) && 
00127                   !discounted) {
00128 
00129                 for (j=1;j<=k-1;j++) {
00130                   store_count(ng->four_byte_counts,
00131                               ng->count_table[j],
00132                               ng->count_table_size,
00133                               ng->count[j],
00134                               ng->count4[j],
00135                               current_pos[j],
00136                               return_count(ng->four_byte_counts,
00137                                            ng->count_table[j],
00138                                            ng->count[j],
00139                                            ng->count4[j],
00140                                            current_pos[j])+1);
00141                 }
00142 
00143                 store_count(ng->four_byte_counts,
00144                             ng->count_table[0],
00145                             ng->count_table_size,
00146                             ng->marg_counts,
00147                             ng->marg_counts4,
00148                             current_pos[0],
00149                             return_count(ng->four_byte_counts,
00150                                          ng->count_table[0],
00151                                          ng->marg_counts,
00152                                          ng->marg_counts4,
00153                                          current_pos[0])+1);
00154 
00155               }
00156             }
00157             current_count = 0;
00158             discounted = 0;
00159             current_table--;
00160             if (current_table > 0) {
00161               current_pos[current_table]++;
00162             }
00163           }
00164         }
00165         else {
00166           if (current_pos[current_table] <= end_pos[current_table]) {
00167             current_table++;
00168             if (current_pos[current_table-1] == 
00169                 ng->num_kgrams[current_table-1]-1) {
00170               end_pos[current_table] = ng->num_kgrams[current_table]-1;
00171             }
00172             else {
00173               end_pos[current_table] = 
00174                 get_full_index(ng->ind[current_table-1][current_pos[current_table-1]+1],
00175                                ng->ptr_table[current_table-1],
00176                                ng->ptr_table_size[current_table-1],
00177                                current_pos[current_table-1]+1)-1;
00178             }
00179           }
00180           else {
00181             current_table--;
00182             if (current_table > 0) {
00183               current_pos[current_table]++;
00184             }
00185           }
00186         }
00187       }
00188     }
00189   } 
00190 
00191   free(current_pos);
00192   free(end_pos);
00193 
00194 }
00195 

Generated on Tue Dec 21 13:54:45 2004 by doxygen1.2.18