#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include "pc_libs/pc_general.h"
#include "idngram2lm.h"
#include "rr_libs/mips_swap.h"
#include "rr_libs/general.h"
#include "ngram.h"
Go to the source code of this file.
Defines | |
#define | BBO_FILE_VERSION 970314 |
Functions | |
void | write_arpa_lm (ng_t *ng, int verbosity) |
pc_message (verbosity, 1,"ARPA-style%d-gram will be written to%s\n", ng->n, ng->arpa_filename) | |
fprintf (ng->arpa_fp,"#############################################################################\n") | |
fprintf (ng->arpa_fp,"##Copyright(c) 1996, Carnegie Mellon University, Cambridge University,\n") | |
fprintf (ng->arpa_fp,"##Ronald Rosenfeld and Philip Clarkson\n") | |
fprintf (ng->arpa_fp,"=============================================================================\n") | |
fprintf (ng->arpa_fp,"This is a%d-gram language model, based on a vocabulary of%d words,\n", ng->n, ng->vocab_size) | |
fprintf (ng->arpa_fp,"which begins\"%s\",\"%s\",\"%s\"...\n", ng->vocab[1], ng->vocab[2], ng->vocab[3]) | |
if (ng->vocab_type==CLOSED_VOCAB) | |
fprintf (ng->arpa_fp,"\n") | |
fprintf (ng->arpa_fp,"else if(bigram w1, w2 exists) bo_wt_2(w1, w2)*p(wd3|wd2)\n") | |
fprintf (ng->arpa_fp,"else p(wd3|w2)\n") | |
fprintf (ng->arpa_fp,"else bo_wt_1(wd1)*p_1(wd2)\n") | |
fprintf (ng->arpa_fp,"All probs and back-off weights(bo_wt) are given in log10 form.\n") | |
fprintf (ng->arpa_fp,"Data formats:\n") | |
fprintf (ng->arpa_fp,"Beginning of data mark:\\data\\\n") | |
for (i=1;i<=ng->n;i++) | |
fprintf (ng->arpa_fp,"end of data mark:\\end\\\n") | |
fprintf (ng->arpa_fp,"\\data\\\n") | |
fprintf (ng->arpa_fp,"\n\\1-grams:\n") | |
for (i=ng->first_id;i<=ng->vocab_size;i++) | |
free (current_pos) | |
free (end_pos) | |
fprintf (ng->arpa_fp,"\n\\end\\\n") | |
rr_oclose (ng->arpa_fp) | |
void | write_bin_lm (ng_t *ng, int verbosity) |
Variables | |
int * | current_pos = (int *) rr_malloc(ng->n*sizeof(int)) |
int * | end_pos = (int *) rr_malloc(ng->n*sizeof(int)) |
int | i |
int | j |
double | log_10_of_e = 1.0 / log(10.0) |
Format of the .arpabo file: ------------------------------ <header info - ignored by programs> \data\ ngram 1=4989 ngram 2=835668 ngram 3=12345678
\1-grams: ... -0.9792 ABC -2.2031 ... log10_uniprob(ZWEIG) ZWEIG log10_alpha(ZWEIG)
\2-grams: ... -0.8328 ABC DEFG -3.1234 ... log10_bo_biprob(WAS | ZWEIG) ZWEIG WAS log10_bialpha(ZWEIG,WAS)
\3-grams: ... -0.234 ABCD EFGHI JKL ...
\end\
Definition in file write_lms.c.
|
Definition at line 30 of file write_lms.c. Referenced by write_bin_lm(). |
|
Definition at line 190 of file write_lms.c. References ng_t::alpha_array, ng_t::arpa_fp, ng_t::bo_weight, ng_t::bo_weight4, double_alpha(), ng_t::four_byte_alphas, fprintf(), i, log_10_of_e, ng_t::max_alpha, ng_t::min_alpha, ng_t::n, ng_t::out_of_range_alphas, ng_t::size_of_alpha_array, ng_t::uni_log_probs, ng_t::uni_probs, and ng_t::vocab. |
|
Definition at line 158 of file write_lms.c. References ng_t::arpa_fp, fprintf(), and i. |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Referenced by arpa_bo_ng_prob(), bo_ng_prob(), calc_prob_of(), compute_back_off(), compute_perplexity(), increment_context(), load_arpa_lm(), main(), merge_tempfiles(), sih_add(), and validate(). |
|
|
|
Definition at line 90 of file write_lms.c. References ng_t::arpa_fp, CLOSED_VOCAB, and fprintf(). |
|
Referenced by bo_ng_prob(), compute_back_off(), compute_gt_discount(), compute_unigram(), eval(), guess_mem(), main(), merge_tempfiles(), update(), and write_bin_lm(). |
|
Referenced by compute_perplexity(), main(), and write_bin_lm(). |
|
Definition at line 32 of file write_lms.c. References current_pos, end_pos, and verbosity. Referenced by main(). |
|
|
Definition at line 241 of file write_lms.c. Referenced by compute_back_off(), increment_context(), and write_arpa_lm(). |
|
Definition at line 242 of file write_lms.c. Referenced by compute_back_off(), increment_context(), and write_arpa_lm(). |
|
|
Definition at line 73 of file write_lms.c. Referenced by calc_mem_req(), display_stats(), increment_context(), load_arpa_lm(), main(), merge_tempfiles(), pc_intarrayarg(), pc_shortarrayarg(), and write_bin_lm(). |
|
Definition at line 74 of file write_lms.c. Referenced by for(). |