Main Page   Compound List   File List   Compound Members   File Members  

evallm.h

Go to the documentation of this file.
00001 /*=====================================================================
00002                 =======   COPYRIGHT NOTICE   =======
00003 Copyright (C) 1996, Carnegie Mellon University, Cambridge University,
00004 Ronald Rosenfeld and Philip Clarkson.
00005 
00006 All rights reserved.
00007 
00008 This software is made available for research purposes only.  It may be
00009 redistributed freely for this purpose, in full or in part, provided
00010 that this entire copyright notice is included on any copies of this
00011 software and applications and derivations thereof.
00012 
00013 This software is provided on an "as is" basis, without warranty of any
00014 kind, either expressed or implied, as to any matter including, but not
00015 limited to warranty of fitness of purpose, or merchantability, or
00016 results obtained from use of this software.
00017 ======================================================================*/
00018 
00019 /* Function prototypes for evallm */
00020 
00021 #ifndef _EVALLM_PROTS_
00022 #define _EVALLM_PROTS_
00023 
00024 #include "pc_libs/pc_general.h"
00025 #include "rr_libs/general.h"
00026 #include "ngram.h"
00027 #include "toolkit.h"
00028 
00029 /* Type specification for forced back-off list */
00030 
00039 typedef struct {
00040   flag backed_off;
00041   flag inclusive;
00042 } fb_info;
00043 
00044 typedef float bo_t;
00045 typedef float prob_t;
00046 
00053 typedef struct {
00054 
00055   unsigned short n;                
00057   /* Vocabulary stuff */
00058 
00059   sih_t          *vocab_ht;      
00060   unsigned short vocab_size;     
00061   char           **vocab;        
00062   flag           *context_cue;   
00064   int            no_of_ccs;      
00066   /* Tree */
00067 
00068   table_size_t   *table_sizes;   
00069   id__t          **word_id;      
00071   bo_t           **bo_weight;    
00072   prob_t         **probs;        
00073   index__t       **ind;          
00075   /* Index lookup tables */
00076 
00077   ptr_tab_t      **ptr_table;     
00079   unsigned short *ptr_table_size; 
00081   /* Misc */
00082 
00083   int            *num_kgrams;     
00087   unsigned short vocab_type;      
00089   unsigned short first_id;        
00092 } arpa_lm_t;
00093 
00094 /* Function prototypes */
00095 
00096 unsigned short num_of_types(int k,
00097                             int ind,
00098                             ng_t *ng);
00099 void decode_bo_case(int bo_case,
00100                     int context_length,
00101                     FILE *annotation_fp);
00102 
00103 void display_stats(ng_t *ng);
00104 
00105 void display_arpa_stats(arpa_lm_t *arpa_ng);
00106 
00107 void load_lm(ng_t *ng,
00108              char *lm_filename);
00109 
00111 void load_arpa_lm(arpa_lm_t *arpa_lm, /*An LM data structure */
00112                   char *lm_filename   /*Input: LM format */
00113                   );
00114                   
00115 
00116 void parse_comline(char *input_line,
00117                   int *num_of_args,
00118                   char **args);
00119 
00120 void compute_perplexity(ng_t *ng,
00121                         arpa_lm_t *arpa_ng,
00122                         char *text_stream_filename,
00123                         char *probs_stream_filename,
00124                         char *annotation_filename,
00125                         char *oov_filename,
00126                         char *fb_list_filename,
00127                         flag backoff_from_unk_inc,
00128                         flag backoff_from_unk_exc,
00129                         flag backoff_from_ccs_inc,
00130                         flag backoff_from_ccs_exc,
00131                         flag arpa_lm,
00132                         flag include_unks,
00133                         double log_base);
00134 
00135 fb_info *gen_fb_list(sih_t *vocab_ht,
00136                      int vocab_size,
00137                      char **vocab,
00138                      flag *context_cue,
00139                      flag backoff_from_unk_inc,
00140                      flag backoff_from_unk_exc,
00141                      flag backoff_from_ccs_inc,
00142                      flag backoff_from_ccs_exc,
00143                      char *fb_list_filename);
00144 
00145 void validate(ng_t *ng,
00146               arpa_lm_t *arpa_ng,
00147               char **words,
00148               flag backoff_from_unk_inc,
00149               flag backoff_from_unk_exc,
00150               flag backoff_from_ccs_inc,
00151               flag backoff_from_ccs_exc,
00152               flag arpa_lm,
00153               char *fb_list_filename);
00154 
00155 double calc_prob_of(id__t sought_word,
00156                     id__t *context,
00157                     int context_length,
00158                     ng_t *ng,
00159                     arpa_lm_t *arpa_ng,
00160                     fb_info *fb_list,
00161                     int *bo_case,
00162                     int *actual_context_length,
00163                     flag arpa_lm);
00164 
00165 void arpa_bo_ng_prob(int context_length,
00166                      id__t *sought_ngram,
00167                      arpa_lm_t *arpa_ng,
00168                      int verbosity,
00169                      double *p_prob,
00170                      int *bo_case);
00171 
00172 
00173 #endif

Generated on Tue Dec 21 13:54:45 2004 by doxygen1.2.18