Main Page   Compound List   File List   Compound Members   File Members  

compute_discount.c

Go to the documentation of this file.
00001 
00002 
00003 /*=====================================================================
00004                 =======   COPYRIGHT NOTICE   =======
00005 Copyright (C) 1996, Carnegie Mellon University, Cambridge University,
00006 Ronald Rosenfeld and Philip Clarkson.
00007 
00008 All rights reserved.
00009 
00010 This software is made available for research purposes only.  It may be
00011 redistributed freely for this purpose, in full or in part, provided
00012 that this entire copyright notice is included on any copies of this
00013 software and applications and derivations thereof.
00014 
00015 This software is provided on an "as is" basis, without warranty of any
00016 kind, either expressed or implied, as to any matter including, but not
00017 limited to warranty of fitness of purpose, or merchantability, or
00018 results obtained from use of this software.
00019 ======================================================================*/
00020 
00021 /* Basically copied from version 1 */
00022 
00023 #include "rr_libs/general.h"
00024 #include "ngram.h"
00025 #include "pc_libs/pc_general.h"
00026 
00031 void compute_gt_discount(int n,
00032                          int            *freq_of_freq,
00033                          int fof_size,
00034                          unsigned short *disc_range,
00035                          int cutoff,
00036                          int verbosity,
00037                          disc_val_t     **discounted_values) {
00038 
00039 
00040   /* Lots of this is lifted straight from V.1 */
00041 
00042   flag done;
00043   int r;
00044   int K;
00045   double common_term;
00046   double first_term;
00047   double *D;
00048 
00049   D = (double *) rr_calloc((*disc_range)+1,sizeof(double));
00050   *discounted_values = D; 
00051 
00052   /* Trap standard things (taken from V.1) */
00053 
00054   if (fof_size == 0) {
00055     return;
00056   }
00057 
00058   if (freq_of_freq[1] == 0) {
00059     pc_message(verbosity,2,"Warning : %d-gram : f-of-f[1] = 0 --> %d-gram discounting is disabled.\n",n,n);
00060     *disc_range=0;
00061     return;
00062   }
00063 
00064   if (*disc_range + 1 > fof_size) {
00065     pc_message(verbosity,2,"Warning : %d-gram : max. recorded f-o-f is only %d\n",n,fof_size);
00066     pc_message(verbosity,2,"%d-gram discounting range is reset to %d.\n",fof_size,n,fof_size-1);
00067     *disc_range = fof_size-1;
00068   }
00069 
00070   done = 0;
00071 
00072   while (!done) {
00073     if (*disc_range == 0) {
00074       pc_message(verbosity,2,"Warning : %d-gram : Discounting is disabled.\n",n);
00075       return;
00076     }
00077 
00078     if (*disc_range == 1) {
00079       /* special treatment for 1gram if there is a zeroton count: */
00080       if ((n==1) && freq_of_freq[0]>0) {
00081         D[1] = freq_of_freq[1] / ((float) (freq_of_freq[1] + freq_of_freq[0]));
00082         pc_message(verbosity,2,"Warning : %d-gram : Discounting range is 1; setting P(zeroton)=P(singleton).\nDiscounted value : %.2f\n",n,D[1]);
00083         return;
00084       }
00085       else {
00086         pc_message(verbosity,2,"Warning : %d-gram : Discounting range of 1 is equivalent to excluding \nsingletons.\n",n);
00087 
00088       }
00089     }
00090 
00091     K = *disc_range;
00092     common_term = ((double) (K+1) * freq_of_freq[K+1]) / freq_of_freq[1];
00093     if (common_term<=0.0 || common_term>=1.0) {
00094       pc_message(verbosity,2,"Warning : %d-gram : GT statistics are out of range; lowering cutoff to %d.\n",n,K-1);
00095       (*disc_range)--;
00096     }
00097     else {
00098       for (r=1;r<=K;r++) {
00099         first_term = ((double) ((r+1) * freq_of_freq[r+1]))
00100                              /  (r    * freq_of_freq[r]);
00101         D[r]=(first_term - common_term)/(1.0 - common_term);
00102       }
00103       pc_message(verbosity,3,"%d-gram : cutoff = %d, discounted values:",n,K);
00104       for (r=1;r<=K;r++) {
00105         pc_message(verbosity,3," %.2f",D[r]);
00106       }
00107       pc_message(verbosity,3,"\n");
00108       done = 1;
00109       for (r=1; r<=K; r++) {
00110         if (D[r]<0 || D[r]>1.0) {
00111           pc_message(verbosity,2,"Warning : %d-gram : Some discount values are out of range;\nlowering discounting range to %d.\n",n,K-1);
00112           (*disc_range)--;
00113           r=K+1;
00114           done = 0;
00115         }
00116       }
00117     }         
00118   }
00119 
00120    for (r=1; r<=MIN(cutoff,K); r++) D[r] = 0.0;
00121 
00122 }
00123 

Generated on Tue Dec 21 13:54:44 2004 by doxygen1.2.18