Main Page   Compound List   File List   Compound Members   File Members  

write_lms.c File Reference

Write LM. More...

#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include "pc_libs/pc_general.h"
#include "idngram2lm.h"
#include "rr_libs/mips_swap.h"
#include "rr_libs/general.h"
#include "ngram.h"

Go to the source code of this file.

Defines

#define BBO_FILE_VERSION   970314

Functions

void write_arpa_lm (ng_t *ng, int verbosity)
 pc_message (verbosity, 1,"ARPA-style%d-gram will be written to%s\n", ng->n, ng->arpa_filename)
 fprintf (ng->arpa_fp,"#############################################################################\n")
 fprintf (ng->arpa_fp,"##Copyright(c) 1996, Carnegie Mellon University, Cambridge University,\n")
 fprintf (ng->arpa_fp,"##Ronald Rosenfeld and Philip Clarkson\n")
 fprintf (ng->arpa_fp,"=============================================================================\n")
 fprintf (ng->arpa_fp,"This is a%d-gram language model, based on a vocabulary of%d words,\n", ng->n, ng->vocab_size)
 fprintf (ng->arpa_fp,"which begins\"%s\",\"%s\",\"%s\"...\n", ng->vocab[1], ng->vocab[2], ng->vocab[3])
 if (ng->vocab_type==CLOSED_VOCAB)
 fprintf (ng->arpa_fp,"\n")
 fprintf (ng->arpa_fp,"else if(bigram w1, w2 exists) bo_wt_2(w1, w2)*p(wd3|wd2)\n")
 fprintf (ng->arpa_fp,"else p(wd3|w2)\n")
 fprintf (ng->arpa_fp,"else bo_wt_1(wd1)*p_1(wd2)\n")
 fprintf (ng->arpa_fp,"All probs and back-off weights(bo_wt) are given in log10 form.\n")
 fprintf (ng->arpa_fp,"Data formats:\n")
 fprintf (ng->arpa_fp,"Beginning of data mark:\\data\\\n")
 for (i=1;i<=ng->n;i++)
 fprintf (ng->arpa_fp,"end of data mark:\\end\\\n")
 fprintf (ng->arpa_fp,"\\data\\\n")
 fprintf (ng->arpa_fp,"\n\\1-grams:\n")
 for (i=ng->first_id;i<=ng->vocab_size;i++)
 free (current_pos)
 free (end_pos)
 fprintf (ng->arpa_fp,"\n\\end\\\n")
 rr_oclose (ng->arpa_fp)
void write_bin_lm (ng_t *ng, int verbosity)

Variables

int * current_pos = (int *) rr_malloc(ng->n*sizeof(int))
int * end_pos = (int *) rr_malloc(ng->n*sizeof(int))
int i
int j
double log_10_of_e = 1.0 / log(10.0)


Detailed Description

Write LM.

Format of the .arpabo file: ------------------------------ <header info - ignored by programs> \data\ ngram 1=4989 ngram 2=835668 ngram 3=12345678

\1-grams: ... -0.9792 ABC -2.2031 ... log10_uniprob(ZWEIG) ZWEIG log10_alpha(ZWEIG)

\2-grams: ... -0.8328 ABC DEFG -3.1234 ... log10_bo_biprob(WAS | ZWEIG) ZWEIG WAS log10_bialpha(ZWEIG,WAS)

\3-grams: ... -0.234 ABCD EFGHI JKL ...

\end\

Definition in file write_lms.c.


Define Documentation

#define BBO_FILE_VERSION   970314
 

Definition at line 30 of file write_lms.c.

Referenced by write_bin_lm().


Function Documentation

for i    = ng-,
first_id;i<=ng->vocab_size;i++   
 

Definition at line 190 of file write_lms.c.

References ng_t::alpha_array, ng_t::arpa_fp, ng_t::bo_weight, ng_t::bo_weight4, double_alpha(), ng_t::four_byte_alphas, fprintf(), i, log_10_of_e, ng_t::max_alpha, ng_t::min_alpha, ng_t::n, ng_t::out_of_range_alphas, ng_t::size_of_alpha_array, ng_t::uni_log_probs, ng_t::uni_probs, and ng_t::vocab.

for i    = 1;i<=ng->n;i++
 

Definition at line 158 of file write_lms.c.

References ng_t::arpa_fp, fprintf(), and i.

fprintf ng->    arpa_fp,
"\n\\end\\\n  
 

Referenced by arpa_bo_ng_prob(), bo_ng_prob(), compute_perplexity(), compute_unigram(), decode_bo_case(), display_arpa_stats(), display_stats(), eval(), for(), gen_fb_list(), get_vocab_from_vocab_ht(), if(), index2(), load_arpa_lm(), main(), merge_tempfiles(), pc_report_unk_args(), printUsage(), read_wlist_into_siht(), sih_add(), sih_val_read_from_file(), sih_val_write_to_file(), validate(), and write_ngram().

fprintf ng->    arpa_fp,
"\n\\1-grams:\n  
 

fprintf ng->    arpa_fp,
"\\data\\\n  
 

fprintf ng->    arpa_fp,
"end of data mark:\\end\\\n  
 

fprintf ng->    arpa_fp,
"Beginning of data mark:\\data\\\n  
 

fprintf ng->    arpa_fp,
"Data formats:\n  
 

fprintf ng->    arpa_fp,
"All probs and back-off weights(bo_wt) are given in log10 form.\n  
 

fprintf ng->    arpa_fp,
"else bo_wt_1(wd1)*p_1(wd2)\n  
 

fprintf ng->    arpa_fp,
"else p(wd3|w2)\n  
 

fprintf ng->    arpa_fp,
"else if(bigram w1, w2 exists) bo_wt_2(w1, w2)*p(wd3|wd2)\n  
 

fprintf ng->    arpa_fp,
"\n  
 

fprintf ng->    arpa_fp,
"which begins\"%s\"   ,
\"%s\"   ,
\"%s\"...\n  ,
ng->    vocab[1],
ng->    vocab[2],
ng->    vocab[3]
 

fprintf ng->    arpa_fp,
"This is a%d-gram language    model,
based on a vocabulary of%d    words,
\n  ,
ng->    n,
ng->    vocab_size
 

fprintf ng->    arpa_fp
 

fprintf ng->    arpa_fp,
"##Ronald Rosenfeld and Philip Clarkson\n  
 

fprintf ng->    arpa_fp,
"##Copyright(c)    1996,
Carnegie Mellon    University,
Cambridge    University,
\n  
 

fprintf ng->    arpa_fp,
"#############################################################################\n  
 

free end_pos   
 

Referenced by arpa_bo_ng_prob(), bo_ng_prob(), calc_prob_of(), compute_back_off(), compute_perplexity(), increment_context(), load_arpa_lm(), main(), merge_tempfiles(), sih_add(), and validate().

free current_pos   
 

if ng->    vocab_type = = CLOSED_VOCAB
 

Definition at line 90 of file write_lms.c.

References ng_t::arpa_fp, CLOSED_VOCAB, and fprintf().

pc_message verbosity   ,
 ,
"ARPA-style%d-gram will be written to%s\n  ,
ng->    n,
ng->    arpa_filename
 

Referenced by bo_ng_prob(), compute_back_off(), compute_gt_discount(), compute_unigram(), eval(), guess_mem(), main(), merge_tempfiles(), update(), and write_bin_lm().

rr_oclose ng->    arpa_fp
 

Referenced by compute_perplexity(), main(), and write_bin_lm().

void write_arpa_lm ng_t   ng,
int    verbosity
 

Definition at line 32 of file write_lms.c.

References current_pos, end_pos, and verbosity.

Referenced by main().

void write_bin_lm ng_t   ng,
int    verbosity
 

Definition at line 439 of file write_lms.c.

References ng_t::abs_disc_const, ABSOLUTE, ng_t::alpha_array, BBO_FILE_VERSION, ng_t::bin_filename, ng_t::bin_fp, ng_t::bo_weight, ng_t::bo_weight4, bo_weight_t, ng_t::context_cue, ng_t::count, ng_t::count4, count_ind_t, count_t, ng_t::count_table, ng_t::count_table_size, cutoff_t, ng_t::cutoffs, ng_t::disc_range, disc_val_t, ng_t::discounting_method, ng_t::first_id, flag, ng_t::fof_size, ng_t::four_byte_alphas, ng_t::four_byte_counts, four_byte_t, ng_t::freq_of_freq, GOOD_TURING, ng_t::gt_disc_ratio, i, ng_t::ind, index__t, j, ng_t::lin_disc_ratio, LINEAR, ng_t::marg_counts, ng_t::marg_counts4, ng_t::max_alpha, ng_t::min_alpha, ng_t::n, ng_t::n_unigrams, ng_t::no_of_ccs, ng_t::num_kgrams, ng_t::oov_fraction, ng_t::out_of_range_alphas, pc_message(), ptr_tab_t, ng_t::ptr_table, ng_t::ptr_table_size, rr_fwrite(), rr_oclose(), sih_val_write_to_file(), ng_t::size_of_alpha_array, SWAPHALF, SWAPWORD, ng_t::uni_log_probs, ng_t::uni_probs, uni_probs_t, verbosity, ng_t::version, ng_t::vocab_ht, ng_t::vocab_size, ng_t::vocab_type, WITTEN_BELL, ng_t::word_id, and ng_t::zeroton_fraction.

Referenced by main().


Variable Documentation

current_pos = (int *) rr_malloc(ng->n*sizeof(int))
 

Definition at line 241 of file write_lms.c.

Referenced by compute_back_off(), increment_context(), and write_arpa_lm().

end_pos = (int *) rr_malloc(ng->n*sizeof(int))
 

Definition at line 242 of file write_lms.c.

Referenced by compute_back_off(), increment_context(), and write_arpa_lm().

int i
 

Definition at line 72 of file write_lms.c.

Referenced by arpa_bo_ng_prob(), bo_ng_prob(), calc_mem_req(), calc_prob_of(), cmp_ngram(), compare_ngrams(), compare_ngrams2(), compute_back_off(), compute_perplexity(), compute_unigram(), decode_bo_case(), display_arpa_stats(), display_stats(), for(), gen_fb_list(), get_ngram(), guess_mem(), load_arpa_lm(), load_lm(), main(), merge_tempfiles(), new_hashtable(), pc_doublearg(), pc_flagarg(), pc_intarg(), pc_intarrayarg(), pc_report_unk_args(), pc_shortarrayarg(), pc_stringarg(), print(), procComLine(), rr_fread(), rr_fwrite(), short_alpha(), updateArgs(), validate(), write_bin_lm(), and write_ngram().

int j
 

Definition at line 73 of file write_lms.c.

Referenced by calc_mem_req(), display_stats(), increment_context(), load_arpa_lm(), main(), merge_tempfiles(), pc_intarrayarg(), pc_shortarrayarg(), and write_bin_lm().

double log_10_of_e = 1.0 / log(10.0)
 

Definition at line 74 of file write_lms.c.

Referenced by for().


Generated on Tue Dec 21 13:54:48 2004 by doxygen1.2.18