package marytts.tools.newlanguage;

import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import marytts.cart.CART;
import marytts.cart.io.MaryCARTReader;
import marytts.cart.io.MaryCARTWriter;
import marytts.fst.AlignerTrainer;
import marytts.fst.FSTLookup;
import marytts.fst.TransducerTrie;
import marytts.modules.phonemiser.AllophoneSet;
import marytts.modules.phonemiser.TrainedLTS;
import org.apache.log4j.BasicConfigurator;
import org.apache.log4j.ConsoleAppender;
import org.apache.log4j.Logger;
import org.apache.log4j.PatternLayout;

/* loaded from: input_file:marytts/tools/newlanguage/LexiconCreator.class */
public class LexiconCreator {
    protected Logger logger;
    protected AllophoneSet allophoneSet;
    protected String lexiconFilename;
    protected String fstFilename;
    protected String ltsFilename;
    protected boolean convertToLowercase;
    protected boolean predictStress;
    protected int context;

    public LexiconCreator(AllophoneSet allophoneSet, String str, String str2, String str3) {
        this(allophoneSet, str, str2, str3, true, true, 2);
    }

    public LexiconCreator(AllophoneSet allophoneSet, String str, String str2, String str3, boolean z, boolean z2, int i) {
        this.allophoneSet = allophoneSet;
        this.lexiconFilename = str;
        this.fstFilename = str2;
        this.ltsFilename = str3;
        this.convertToLowercase = z;
        this.predictStress = z2;
        this.context = i;
        this.logger = Logger.getLogger("LexiconCreator");
    }

    protected void prepareLexicon() throws IOException {
    }

    protected void compileFST() throws IOException {
        this.logger.info("Compressing into FST:");
        this.logger.info(" - aligning graphemes and allophones...");
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(this.lexiconFilename), "UTF-8"));
        AlignerTrainer alignerTrainer = new AlignerTrainer(false, true);
        alignerTrainer.readLexicon(bufferedReader, "\\s*\\|\\s*");
        bufferedReader.close();
        for (int i = 0; i < 4; i++) {
            this.logger.info("     iteration " + (i + 1));
            alignerTrainer.alignIteration();
        }
        this.logger.info(" - entering alignments in trie...");
        TransducerTrie transducerTrie = new TransducerTrie();
        int lexiconSize = alignerTrainer.lexiconSize();
        for (int i2 = 0; i2 < lexiconSize; i2++) {
            transducerTrie.add(alignerTrainer.getAlignment(i2));
            transducerTrie.add(alignerTrainer.getInfoAlignment(i2));
        }
        this.logger.info(" - minimizing trie...");
        transducerTrie.computeMinimization();
        this.logger.info(" - writing transducer to disk...");
        DataOutputStream dataOutputStream = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(new File(this.fstFilename))));
        transducerTrie.writeFST(dataOutputStream, "UTF-8");
        dataOutputStream.flush();
        dataOutputStream.close();
    }

    protected void testFST() throws IOException {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        loadTestWords(arrayList, arrayList2, arrayList3, 100);
        this.logger.info(" - looking up " + arrayList.size() + " test words...");
        FSTLookup fSTLookup = new FSTLookup(this.fstFilename);
        int size = arrayList.size();
        for (int i = 0; i < size; i++) {
            String str = arrayList.get(i);
            String str2 = arrayList2.get(i);
            String[] lookup = fSTLookup.lookup(str);
            if (arrayList3.get(i) != null) {
                String str3 = str + arrayList3.get(i);
                String[] lookup2 = fSTLookup.lookup(str3);
                if (!str2.equals(lookup2[0])) {
                    this.logger.info("    " + str3 + " -> " + Arrays.toString(lookup2) + " (expected: " + str2 + ")");
                }
                boolean z = false;
                int length = lookup.length;
                int i2 = 0;
                while (true) {
                    if (i2 >= length) {
                        break;
                    }
                    if (str2.equals(lookup[i2])) {
                        z = true;
                        break;
                    }
                    i2++;
                }
                if (!z) {
                    this.logger.info("    " + str + " -> " + Arrays.toString(lookup) + " (expected: " + str2 + ")");
                }
            } else if (!str2.equals(lookup[0])) {
                this.logger.info("    " + str + " -> " + Arrays.toString(lookup) + " (expected: " + str2 + ")");
            }
        }
        this.logger.info("...done!\n");
    }

    private void loadTestWords(List<String> list, List<String> list2, List<String> list3, int i) throws UnsupportedEncodingException, FileNotFoundException, IOException {
        int i2 = 0;
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(this.lexiconFilename), "UTF-8"));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return;
            }
            String[] split = readLine.split("\\s*\\|\\s*");
            String str = split[0];
            String str2 = split[1];
            String str3 = (split.length <= 2 || split[2].length() <= 0) ? null : split[2];
            i2++;
            if (i2 == i) {
                list.add(str);
                list2.add(str2);
                list3.add(str3);
                i2 = 0;
            }
        }
    }

    protected void compileLTS() throws IOException {
        this.logger.info("Training letter-to-sound rules...");
        LTSTrainer lTSTrainer = new LTSTrainer(this.allophoneSet, this.convertToLowercase, this.predictStress, this.context);
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(this.lexiconFilename), "UTF-8"));
        this.logger.info(" - reading lexicon...");
        lTSTrainer.readLexicon(bufferedReader, "\\s*\\|\\s*");
        this.logger.info(" - aligning...");
        for (int i = 0; i < 5; i++) {
            this.logger.info("     iteration " + (i + 1));
            lTSTrainer.alignIteration();
        }
        this.logger.info(" - training decision tree...");
        CART trainTree = lTSTrainer.trainTree(10);
        this.logger.info(" - saving...");
        new MaryCARTWriter().dumpMaryCART(trainTree, this.ltsFilename);
    }

    protected void testLTS() throws IOException {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        loadTestWords(arrayList, arrayList2, new ArrayList(), 100);
        this.logger.info(" - loading LTS rules...");
        TrainedLTS trainedLTS = new TrainedLTS(this.allophoneSet, new MaryCARTReader().load(this.ltsFilename));
        this.logger.info(" - looking up " + arrayList.size() + " test words...");
        int size = arrayList.size();
        int i = 0;
        for (int i2 = 0; i2 < size; i2++) {
            String str = arrayList.get(i2);
            String str2 = arrayList2.get(i2);
            String syllabify = trainedLTS.syllabify(trainedLTS.predictPronunciation(str));
            if (str2.equals(syllabify)) {
                i++;
            } else {
                this.logger.info("    " + str + " -> " + syllabify + " (expected: " + str2 + ")");
            }
        }
        this.logger.info("   for " + i + " out of " + size + " prediction is identical to lexicon entry.");
        this.logger.info("...done!\n");
    }

    public void createLexicon() throws Exception {
        prepareLexicon();
        compileFST();
        testFST();
        System.gc();
        compileLTS();
        testLTS();
    }

    public static void main(String[] strArr) throws Exception {
        BasicConfigurator.configure(new ConsoleAppender(new PatternLayout("%d %m\n")));
        new LexiconCreator(AllophoneSet.getAllophoneSet(strArr[0]), strArr[1], strArr[2], strArr[3]).createLexicon();
    }
}
