.

# load models from your homework5
source start-up6.tcl
sns configure -scoreScale [expr 1.0 / 26.0]
# open database
set uttDB utterance
DBase db
db open ${uttDB}.dat ${uttDB}.idx -mode "r"
# readFile
source trainLib.tcl
set trainKeyFile "./train_utt.lst"
#set keyList [lrange [readFile $trainKeyFile] 0 19]
set keyList [readFile $trainKeyFile]
# this is homework5
if {0} {
writeKeyListViterbi $keyList
}
proc addPTreeProc {args} {
# assumes that the ptree is part of dssTree.ptreeSet
puts $args
# itfParseArgv is an argument parsing which is part of the Janus library
itfParseArgv addPTreeProc $args [list [
list "<ptree>" string {} ptree {} {PTree} ] [
list "<wordModelName>" string {} wordModelName {} {phoneSeq} ] [
list "<tagged phones>" string {} tagPhoneSeq {} {tagged phones} ] [
list "<leftContext>" int {} leftContext {} {left context index} ] [
list "<rightContext>" int {} rightContext {} {right context index} ] [
list "<count>" int {} count {} {count of phone seq} ] [
list "-model" string {} model {} {name of model in DistribSet} ] ]
# do something usefull here
if {0 == [regexp {(.*)-(s[0-9]+)(\([0-9]\)|)$} $ptree dummy phone stateName nodeCount ]} {
puts "ERROR (addPTreeProc): naming convention of ptree $ptree does not match expectations!"
set stateName "s1"
set phone "SIL"
}
set dsName ${wordModelName}-${stateName}$nodeCount
set cbName "$phone-$stateName"
if { [dssTree.ptreeSet:${ptree}.modelSet index $dsName] == -1 } {
puts "add new model $dsName"
dss add $dsName $cbName
}
return $dsName
}
#
# We add at each leaf a ptree except for silence
#
foreach ds [dss:] {
if { $ds != "SIL-s1" } {
regexp {(.*)-(s[0-9]+)$} $ds dummy phone stateName
dssTree.ptreeSet add $ds $phone 0 0 -count 1
# Naming convention!
dssTree:LEAF-$ds configure -ptree [dssTree.ptreeSet index $ds]
}
}
# ups we forgot to set the add proc
foreach pt [dssTree.ptreeSet] {
dssTree.ptreeSet:$pt configure -addProc addPTreeProc
dssTree.ptreeSet:$pt configure -maxContext 1
}
# we could get this easier with
# The configuration of the class is copied to a new created instance of this class
PTree configure -addProc addPTreeProc
PTree configure -maxContext 1 ;# tri-phones; 2 = penta-phones ...
set uttKey spk030_utt7
set uttInfo [db get $uttKey]
makeArray uttArray $uttInfo
hmm make $uttArray(TEXT) -opt $
lindex [hmm.stateGraph] 0
dssTree configure -ptreeAdd 1
hmm make $uttArray(TEXT) -opt $
lindex [hmm.stateGraph] 0
# we see no new models
dssTree.ptreeSet.item(0)
# {{ -i} 0 0 3.000000}
# {{ zh { -i T2} { uen T2}} -1 1 2.000000 -i(zh|uen)-s1}
# {{ { zh WB} { -i T4} d} -1 1 2.000000 -i(zh|d)-s1}
# {{ { sh WB} { -i T4} { ie T4 WB}} -1 1 2.000000 -i(sh|ie)-s1}
# we have to do this for all utterances in the training data
foreach uttKey $keyList {
set uttInfo [db get $uttKey]
makeArray uttArray $uttInfo
hmm make $uttArray(TEXT) -opt $
}
# we got all the context so don't count further
dssTree configure -ptreeAdd 0
if {0} {
dssTree.ptreeSet write weights/CI-ptree.desc.gz
dss write weights/CI-ptree-dss.desc.gz
dss save weights/CI-ptree-dss.param.gz
cbs write weights/CI-ptree-cbs.desc.gz
cbs save weights/CI-ptree-cbs.param.gz
}
# see the file
# ; -------------------------------------------------------
# ; Name : dssTree
# ; Type : PTreeSet
# ; Date : Wed Jan 18 21:34:40 2006
# ; -------------------------------------------------------
# -i-s1 { -i} 0 0 -count 3.000000
# -i-s1 { s { -i T1} t} -1 1 -count 2.000000 -model -i(s|t)-s1
# -i-s1 { { s WB} { -i T1} f} -1 1 -count 6.000000 -model -i(s|f)-s1
# -i-s1 { { s WB} { -i T1} h} -1 1 -count 1.000000 -model -i(s|h)-s1
# -i-s1 { { s WB} { -i T1} t} -1 1 -count 18.000000 -model -i(s|t)-s1
# -i-s1 { { s WB} { -i T1} { ing T2 WB}} -1 1 -count 2.000000 -model -i(s|ing)-s1
# -i-s1 { zh { -i T1} b} -1 1 -count 6.000000 -model -i(zh|b)-s1
# -i-s1 { s { -i T1} l} -1 1 -count 1.000000 -model -i(s|l)-s1
# -i-s1 { zh { -i T1} h} -1 1 -count 1.000000 -model -i(zh|h)-s1
# -i-s1 { zh { -i T1} t} -1 1 -count 1.000000 -model -i(zh|t)-s1
# -i-s1 { zh { -i T1} l} -1 1 -count 5.000000 -model -i(zh|l)-s1
# -i-s1 { { s WB} { -i T1} l} -1 1 -count 2.000000 -model -i(s|l)-s1
# -i-s1 { { s WB} { -i T1} j} -1 1 -count 2.000000 -model -i(s|j)-s1
# -i-s1 { { s WB} { -i T1} r} -1 1 -count 3.000000 -model -i(s|r)-s1
# -i-s1 { { zh WB} { -i T1} b} -1 1 -count 17.000000 -model -i(zh|b)-s1
# -i-s1 { { zh WB} { -i T1} f} -1 1 -count 5.000000 -model -i(zh|f)-s1
# -i-s1 { { zh WB} { -i T1} h} -1 1 -count 9.000000 -model -i(zh|h)-s1
# -i-s1 { { zh WB} { -i T1} l} -1 1 -count 3.000000 -model -i(zh|l)-s1
# loading models delete the accus; you have to create new!
cbs read weights/CI-ptree-cbs.desc.gz
cbs load weights/CI-ptree-cbs.param.gz
dss read weights/CI-ptree-dss.desc.gz
dss load weights/CI-ptree-dss.param.gz
dssTree.ptreeSet read weights/CI-ptree.desc.gz
# query a model
dssTree.ptreeSet.item(0) get { z { -i T5 WB} { ch WB}} -1 1
# 1.000000 142923 -i(z|ch)-s1
# query a non existing context
dssTree.ptreeSet.item(0) get { SIL { -i T5 WB} { ch WB}} -1 1
# nothing is returned, so it the ptree is asked to return a
# context and there is entry available the the distribTree
# will use the model attached to the tree node
# we train only the mixtures, also we could also train the Gaussian
if {0} {
dss createAccus
cbs createAccus ;# the statistic for the mixture only colleted if
;# also codebook accus are allocated
accuOneIterLabel $keyList 1
cbs freeAccus ;# we only want to update the mixture weights
;# therefore we remove the codebook accus
sns update
dss write weights/CI-ptree1-dss.desc.gz
dss save weights/CI-ptree1-dss.param.gz
cbs write weights/CI-ptree1-cbs.desc.gz
cbs save weights/CI-ptree1-cbs.param.gz
}
dss read weights/CI-ptree1-dss.desc.gz
dss load weights/CI-ptree1-dss.param.gz
cbs read weights/CI-ptree1-cbs.desc.gz
cbs load weights/CI-ptree1-cbs.param.gz
#
# training of mixture weights is done!
#
#
# we will build a decision tree top down with splitting questions
# The approach is very similar to "id3"
# First we need questions to split the models/data
# We already used questions to build a context independent DistribTree in session 4
# The questions there were also stores in a sub object of type QuestionSet
QuestionSet qs phonesSet:PHONES phonesSet tags -padPhone [phonesSet:PHONES index @]
# We collected context of width 1 --> we will ask only for in a context of [-1..1]
# let us ask about the word boundary tag
qs add "0=WB" ;# is the center phoneme at a word boundary?
qs add "-1=WB"
qs add "+1=WB"
qs add "-1=WB 0=WB +1=WB"
# Question: Given our dictionary when can this become true?
# Task: Check the ptree description for examples which make this question become true.
qs add "-1=INITIALS"
qs add "+1=INITIALS"
# Question: Why does it make no sense to ask "0=INITIAL"?
# we allow to ask for phonemes (and tags)
foreach phone [phonesSet:PHONES] {
qs add "-1=$phone"
qs add "+1=$phone"
qs add "-1=$phone -1=WB"
qs add "+1=$phone +1=WB"
}
# Task: Extend the question set with useful phoneme classes
# that will be used for questions (homework) You can find some
# here
# Remember session 4. This are OR question (examples)
qs add "0=T1 | 0=T2"
qs add "0=T1 | 0=T3"
# Question: why is it not necessary to include complementary questions?
# TASK: Add more question about the tone.
#
# build the context dependent distribution tree
#
# okay for our example we already have 251 questions
# let us find the best question that woulds split ang-s1
# but first we want to make sure that we have enough data that fall into the new leafs to train an acoustic model (this information is configured in the DistribSet)
dss configure -minCount 800 ;# we don't have a large data base
dssTree question -help
# Options of 'question' are:
# <node> root node
# -questionSet question set (QuestionSet:)
# -minCount minimum count (float:0.000000)
set splitQuestion [dssTree question LEAF-ang-s1 -questionSet qs -minCount 800]
# {+1=z} 3.985645e+27
# let us try another one
set splitQuestion [dssTree question LEAF-a-s1 -questionSet qs -minCount 800]
# {+1=d} 5.482785e+27
# The higher, the better so we want to split LEAF-a-s1
dssTree split -help
Options of 'split' are:
# <node> node
# <question> question (string:"NULL")
# <noNode> NO successor node (string:"-")
# <yesNode> YES successor node (string:"-")
# <undefNode> UNDEF successor node (string:"-")
# -minCount minimum count (float:0.000000)
dssTree:LEAF-a-s1 configure
#{-name LEAF-a-s1} {-question 0} {-no -1} {-yes -1} {-undef -1} {-model 8} {-ptree 7}
#
# The split method adds new PTree to the PTreeSet.
# This invokes the addPTreeProc!
#
dssTree split LEAF-a-s1 "+1=z" a-s1(1) a-s1(2) -
dssTree:LEAF-a-s1 configure
# {-name LEAF-a-s1} {-question 61} {-no 488} {-yes 489} {-undef -1} {-model 8} {-ptree -1}
dssTree.questionSet.item(61)
# {+1=z} {
# {{ { +1 z}} {}}}
dssTree.item(488) configure ; dssTree:a-s1(1) configure
# {-name a-s1(1)} {-question 0} {-no -1} {-yes -1} {-undef -1} {-model -1} {-ptree 239}
dssTree.item(489) configure ; dssTree:a-s1(2) configure
# {-name a-s1(2)} {-question 0} {-no -1} {-yes -1} {-undef -1} {-model -1} {-ptree 240}
# at the moment there is no model attached!
# Question: Can you build a hmm using this tree?
dssTree.ptreeSet.item(239)
# This ptree now stores all the models that answer was evaluated to NO
dssTree.ptreeSet.item(240)
# This ptree now stores all the models that answer was evaluated to YES
# TASK: write a procedure that split the DistribTree up to N nodes starting from the CI-tree. (homework details from Stan)
# let us do some more splits
set splitQuestion [dssTree question a-s1(1) -questionSet qs -minCount 800]
# {-1=m -1=WB} 7.044706e+27
set splitQuestion [dssTree question a-s1(2) -questionSet qs -minCount 800]
# {-1=ch} 2.096479e+25
dssTree split a-s1(1) "-1=m -1=WB" a-s1(3) a-s1(4) -
# after we have split all the nodes we want to split we save the models
dssTree write weights/CD-dssTree-ptree.desc ;# ptree to indicate that there are still ptrees attached
dssTree.ptreeSet write weights/CD-ptreeSet.desc
#
# We now want to add context dependent codebooks and distributions
# This is only an example!!
set pTreeIdx [dssTree:a-s1(4) configure -ptree]
if {$pTreeIdx > -1} {
# we have a ptree and want to add a GMM to this leaf
set refN [cbs:a-s1 configure -refN]
# in practice this need some more checking
cbs add a-s1(4) FEAT $refN 26 DIAGONAL
cbs:a-s1(4) := cbs:a-s1
dss add a-s1(4) a-s1(4)
dssTree:a-s1(4) configure -model [dssTree.modelSet index a-s1(4)]
}
# This should be done only for non-empty ptrees!!
# now remove the mixture weights we need no longer
# we call this the pruning step
# create a new object to store the new distributions
DistribSet dss2 cbs
foreach node [dssTree:] {
set model [dssTree:$node configure -model]
set ptree [dssTree:$node configure -ptree]
if { $ptree > -1 } { dssTree:$node configure -ptree -1 }
if { $model > -1 } {
set dsname [dss name $model]
dss2 add $dsname [cbs name [dss:$dsname configure -cbX]]
}
}
dssTree write weights/CD-dssTree-pruned.desc.gz
dss2 write weights/CD-dss-pruned.desc.gz
cbs write weights/CD-cbs-pruned.desc.gz
cbs save weights/CD-cbs-pruned.param.gz
# we are mainly interested in the description of the distribTree, distributionSet and codebookSet
Last modified: Wed Jan 11 20:39:52 Eastern Standard Time 2006
Maintainer: tschaaf@cs.cmu.edu.