The script contains two procedures for debugging: cntallo and showallo for counting and displaying all different word-initial-allophones. These numbers can be compared to a (bugfree) output of the search module.
if 0 { How to create a JANUS 3 tree out of a JANUS 2 tree ================================================== 1. Format conversion -------------------- create a file which mirrors the original JANUS 2 tree, it has one tree node per line, and the same nodes as the original tree. Refer to the JANUS 3 tree file format specification for details. 2. If you are not using a clustering tree ----------------------------------------- run fixSTree (below) to asign to every incomplete context the same model as if the missing context was SIL; save the resulting stree 3. If you are using a clustering tree ------------------------------------- a) start up with the raw JANUS 3 tree, run mkheader to create a tree which contains subtrees for biphones and monophones; also, modify the root nodes of your triphone tree to use the names any(any,any)-[bme]. b) Concatenate the biphones subtree and the triphones tree c) start the system up with the concatenated tree and run fixSTree; save the resulting stree NOTE: don't mind if you get many warnings about undefined model indices (these are the biphone models that have yet to be defined. NOTE: while fixing the tree you will probably get messages about incorrigible triphones. You might want to check if all of them contain special phonemes (like word- boundaries or such). If one of the incorrigible triphones consists of regular phones only, then something's wrong, complain to Ivica. } # =========================================================== # the procedure mkheader will write the biphone and monophone # subtrees needed to run a system with inclomplete contexts. # The phones argument is a list of all the used monophones. # The fileName argument is the name of the file to write. # NOTE: The writte tree will use "any" as name of a Phones # object that contains all phones. Take a look at your # PhonesSet and (if needed) add the "any" Phones. # =========================================================== proc mkheader { phones fileName } { lappend phones "-" set phones1 [lrange $phones 1 999999] set phones0 [lrange $phones 0 [expr [llength $phones] -2]] set firstPhone [lindex $phones 0] set fp [open $fileName w] puts $fp "ROOT-b {-1=any} - DoKnowLeft-b DontKnowLeft-b -" puts $fp "ROOT-m {-1=any} - DoKnowLeft-m DontKnowLeft-m -" puts $fp "ROOT-e {-1=any} - DoKnowLeft-e DontKnowLeft-e -" puts $fp "DontKnowLeft-b {+1=any} - ask_${firstPhone}(unk,any)-b ask_${firstPhone}(unk,unk)-b -" puts $fp "DontKnowLeft-m {+1=any} - ask_${firstPhone}(unk,any)-m ask_${firstPhone}(unk,unk)-m -" puts $fp "DontKnowLeft-e {+1=any} - ask_${firstPhone}(unk,any)-e ask_${firstPhone}(unk,unk)-e -" puts $fp "DoKnowLeft-b {+1=any} - any(any,any)-b ask_${firstPhone}(any,unk)-b -" puts $fp "DoKnowLeft-m {+1=any} - any(any,any)-m ask_${firstPhone}(any,unk)-m -" puts $fp "DoKnowLeft-e {+1=any} - any(any,any)-e ask_${firstPhone}(any,unk)-e -" foreach root { b m e } { set thisPhone [lindex $phones 0] foreach nextPhone $phones1 { puts $fp "ask_${thisPhone}(any,unk)-$root \{0=$thisPhone\} ask_${nextPhone}(any,unk)-$root ${thisPhone}(ask_$firstPhone,any)-$root - -" puts $fp "ask_${thisPhone}(unk,any)-$root \{0=$thisPhone\} ask_${nextPhone}(unk,any)-$root ${thisPhone}(any,ask_$firstPhone)-$root - -" set thisPhone $nextPhone } foreach phone $phones0 { set thisContext [lindex $phones 0] foreach nextContext $phones1 { puts $fp "${phone}(any,ask_$thisContext)-$root \{+1=$thisContext\} ${phone}(any,ask_$nextContext)-$root ${phone}($thisContext,unk)-$root - -" puts $fp "${phone}(ask_$thisContext,any)-$root \{-1=$thisContext\} ${phone}(ask_$nextContext,any)-$root ${phone}(unk,$thisContext)-$root - -" set thisContext $nextContext } } foreach phone $phones0 { foreach context $phones0 { puts $fp "${phone}($context,unk)-$root \{\} - - - undefd" puts $fp "${phone}(unk,$context)-$root \{\} - - - undefd" } } set thisPhone [lindex $phones 0] foreach nextPhone $phones1 { puts $fp "ask_${thisPhone}(unk,unk)-$root \{0=$thisPhone\} ask_${nextPhone}(unk,unk)-$root ${thisPhone}(unk,unk)-$root - -" set thisPhone $nextPhone } foreach phone $phones0 { puts $fp "${phone}(unk,unk)-$root \{\} - - - undefd" } } close $fp } # =========================================================== # fixStree will replace the given stree-node's models; every # node which is the end-node for an incomplete triphone (one # or both contexts unknown) will get the model that is used # for the triphone where the missing contexts are SIL or WB # =========================================================== proc fixSTree { monophones tree senones } { set context WB ;# or SIL (WB for non x-word, SIL for x-word) foreach p {b m e} { foreach x [$monophones] { foreach y [$monophones] { set nodeAndIndex [$tree get ROOT-$p "$x $y" -1 0 -node 1] set snIndex [ lindex $nodeAndIndex 0 ] set node [ lindex $nodeAndIndex 1 ] set newIndex [$tree get ROOT-$p "$x $y $context" -1 1] if {$newIndex < 0} { puts "Can't correct $x $y $context" } else { $tree:$node configure -model $newIndex } set nodeAndIndex [$tree get ROOT-$p "$x $y" 0 1 -node 1] set snIndex [lindex $nodeAndIndex 0] set node [lindex $nodeAndIndex 1] set newIndex [$tree get ROOT-$p "$context $x $y" -1 1] if {$newIndex < 0} { puts "Can't correct $context $x $y" } else { $tree:$node configure -model $newIndex } } set nodeAndIndex [$tree get ROOT-$p "$x" 0 0 -node 1] set snIndex [ lindex $nodeAndIndex 0 ] set node [ lindex $nodeAndIndex 1 ] set newIndex [$tree get ROOT-$p "$context $x $context" -1 1] if {$newIndex < 0} { puts "Can't correct $context $x $context" } else { $tree:$node configure -model $newIndex } } } return } # =========================================================== # the following procedure counts the number of different # allophones that can be at the beginning of a word, given # a dictionary an a senone tree # =========================================================== proc cntallo { dict stree } { set wordN [llength [$dict]] for { set wordX 0 } { $wordX < $wordN } { incr wordX } { set phones [lrange [$dict.phones name [lindex [$dict.item($wordX)] 1]] 0 1] if { [llength $phones] == 1 } { set to 0 } else { set to 1 } set allo([$stree get ROOT-b $phones 0 $to],[$stree get ROOT-m $phones 0 $to],[$stree get ROOT-e $phones 0 $to]) 1 } return [array size allo] } # =========================================================== # the following procedure prints out all words together with # their initial allophone; can be used to campare to JANUSV2 # =========================================================== proc showallo { dict stree file } { set wordN [llength [$dict]] for { set wordX 0 } { $wordX < $wordN } { incr wordX } { set phones [lrange [$dict.phones name [lindex [$dict.item($wordX)] 1]] 0 1] if { [llength $phones] == 1 } { set to 0 } else { set to 1 } set begX [$stree get ROOT-b $phones 0 $to] set midX [$stree get ROOT-m $phones 0 $to] set endX [$stree get ROOT-e $phones 0 $to] if { $begX < 0 } { set beg "(none)" } else { set beg [lindex [$stree.senoneSet.item($begX)] 0] } if { $midX < 0 } { set mid "(none)" } else { set mid [lindex [$stree.senoneSet.item($midX)] 0] } if { $endX < 0 } { set end "(none)" } else { set end [lindex [$stree.senoneSet.item($endX)] 0] } puts $file "[lindex [$dict.item($wordX)] 0] = $beg $mid $end" } }