proc lmAccu { langmod text } { upvar $langmod l incr l(cnt) [llength $text] set v [lindex $text 0] for { set x 1 } { $x < [llength $text] } { incr x } { set w [lindex $text $x] ; set q $v,$w if {![info exist l(1,$v)]} {set l(1,$v) 1 ; incr l(m)} else {incr l(1,$v)} if {![info exist l(2,$q)]} {set l(2,$q) 1 ; incr l(b)} else {incr l(2,$q)} set v $w } } proc lmUpdate { langmod } { upvar $langmod l ; set disc 0.01 foreach i [array names l] { regexp {(.*),(.*)} $i dummy n v ; regexp {(.*),(.*),(.*)} $i dummy n v w if {$n == 1} {set l(p,$i) [expr log(($l($i)-$disc)/$l(cnt)) /2.30259]} if {$n == 2} {set l(p,$i) [expr log(($l($i)-$disc)/$l(1,$v))/2.30259]} } } proc lmWrite { langmod filename } { upvar $langmod l set f [open $filename w] ; set mlist {} ; set blist {} foreach i [array names l] { if [regexp {p,1,(.*)} $i d v ] { lappend mlist "$v $l($i) -99.9" } if [regexp {p,2,(.*),(.*)} $i d v w] { lappend blist "$v $w $l($i)" } } set mlist [lsort -ascii $mlist] ; set blist [lsort -ascii $blist] puts $f "\\data\\\nngram 1=$l(m)\nngram 2=$l(b)\n\n\\1-grams:" foreach m $mlist { puts $f "[lindex $m 1] [lindex $m 0] [lindex $m 2]" } puts $f "\n\\2-grams:" foreach b $blist { puts $f "[lindex $b 2] [lindex $b 0] [lindex $b 1]" } puts $f "\\end\\" close $f } [DBase db] open ../step1/db.dat ../step1/db.idx -mode r set lm(cnt) 0 ; set lm(m) 1 ; set lm(b) 0 ; set lm(1,</s>) 1 foreach utt [db] { makeArray arr [db get $utt] set text [concat {<s>} $arr(text) {</s>}] puts "$utt $text" lmAccu lm $text } lmUpdate lm lmWrite lm langmod exit