package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.NumberRangesFileFilter;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.ling.StringLabel;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.stats.Distribution;
import edu.stanford.nlp.stats.EquivalenceClassEval;
import edu.stanford.nlp.stats.GeneralizedCounter;
import edu.stanford.nlp.trees.MemoryTreebank;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeToBracketProcessor;
import edu.stanford.nlp.trees.TreeTransformer;
import edu.stanford.nlp.trees.Trees;
import edu.stanford.nlp.trees.WordCatConstituent;
import edu.stanford.nlp.trees.WordCatEqualityChecker;
import edu.stanford.nlp.trees.WordCatEquivalenceClasser;
import edu.stanford.nlp.trees.international.pennchinese.ChineseTreebankLanguagePack;
import edu.stanford.nlp.trees.international.pennchinese.RadicalMap;
import edu.stanford.nlp.util.Numberer;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.Timing;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InvalidObjectException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.ObjectStreamException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Serializable;
import java.io.Writer;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

/* loaded from: input_file:edu/stanford/nlp/parser/lexparser/ChineseCharacterBasedLexicon.class */
public class ChineseCharacterBasedLexicon implements Lexicon {
    protected static PrintWriter pw;
    private static double lengthPenalty;
    private static int penaltyType;
    private Map<List, Distribution> charDistributions;
    private Set knownChars;
    private Distribution POSDistribution;
    private static boolean useUnknownCharModel;
    private static final int CONTEXT_LENGTH = 2;
    protected static final NumberFormat formatter;
    private static final long serialVersionUID = -5357655683145854069L;
    static final /* synthetic */ boolean $assertionsDisabled;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/stanford/nlp/parser/lexparser/ChineseCharacterBasedLexicon$Symbol.class */
    public static class Symbol implements Serializable {
        private static final int UNKNOWN_TYPE = 0;
        private static final int DIGIT_TYPE = 1;
        private static final int LETTER_TYPE = 2;
        private static final int BEGIN_WORD_TYPE = 3;
        private static final int END_WORD_TYPE = 4;
        private static final int CHAR_TYPE = 5;
        private static final int UNK_CLASS_TYPE = 6;
        private char ch;
        private String unkClass;
        int type;
        public static final Symbol UNKNOWN;
        public static final Symbol DIGIT;
        public static final Symbol LETTER;
        public static final Symbol BEGIN_WORD;
        public static final Symbol END_WORD;
        public static Interner interner;
        private static final long serialVersionUID = 8925032621317022510L;
        static final /* synthetic */ boolean $assertionsDisabled;

        public Symbol(char c) {
            this.type = 5;
            this.ch = c;
        }

        public Symbol(String str) {
            this.type = 6;
            this.unkClass = str;
        }

        public Symbol(int i) {
            if (!$assertionsDisabled && i == 5) {
                throw new AssertionError();
            }
            this.type = i;
        }

        public static Symbol cannonicalSymbol(char c) {
            return Character.isDigit(c) ? DIGIT : (Character.getNumericValue(c) < 10 || Character.getNumericValue(c) > 35) ? new Symbol(c) : LETTER;
        }

        public char getCh() {
            if (this.type == 5) {
                return this.ch;
            }
            return '*';
        }

        public Symbol intern() {
            return (Symbol) interner.intern(this);
        }

        public String toString() {
            return this.type == 5 ? "[u" + ((int) this.ch) + "]" : this.type == 6 ? "UNK:" + this.unkClass : Integer.toString(this.type);
        }

        private Object readResolve() throws ObjectStreamException {
            switch (this.type) {
                case 0:
                    return UNKNOWN;
                case 1:
                    return DIGIT;
                case 2:
                    return LETTER;
                case 3:
                    return BEGIN_WORD;
                case 4:
                    return END_WORD;
                case 5:
                    return intern();
                case 6:
                    return intern();
                default:
                    throw new InvalidObjectException("ILLEGAL VALUE IN SERIALIZED SYMBOL");
            }
        }

        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (!(obj instanceof Symbol)) {
                return false;
            }
            Symbol symbol = (Symbol) obj;
            if (this.ch == symbol.ch && this.type == symbol.type) {
                return this.unkClass != null ? this.unkClass.equals(symbol.unkClass) : symbol.unkClass == null;
            }
            return false;
        }

        public int hashCode() {
            return (29 * ((29 * this.ch) + (this.unkClass != null ? this.unkClass.hashCode() : 0))) + this.type;
        }

        static {
            $assertionsDisabled = !ChineseCharacterBasedLexicon.class.desiredAssertionStatus();
            UNKNOWN = new Symbol(0);
            DIGIT = new Symbol(1);
            LETTER = new Symbol(2);
            BEGIN_WORD = new Symbol(3);
            END_WORD = new Symbol(4);
            interner = new Interner();
        }
    }

    public static void printStats(Collection<Tree> collection) {
        ClassicCounter classicCounter = new ClassicCounter();
        ClassicCounter classicCounter2 = new ClassicCounter();
        ClassicCounter classicCounter3 = new ClassicCounter();
        int i = 0;
        Iterator<Tree> it = collection.iterator();
        while (it.hasNext()) {
            i++;
            List taggedYield = it.next().taggedYield(new ArrayList());
            int size = taggedYield.size();
            for (int i2 = 0; i2 < size; i2++) {
                TaggedWord taggedWord = (TaggedWord) taggedYield.get(i2);
                String word = taggedWord.word();
                if (!word.equals(Lexicon.BOUNDARY)) {
                    classicCounter2.incrementCount(taggedWord);
                    classicCounter.incrementCount(Integer.valueOf(word.length()));
                    int length = word.length();
                    for (int i3 = 0; i3 < length; i3++) {
                        classicCounter3.incrementCount(Symbol.cannonicalSymbol(word.charAt(i3)));
                    }
                    classicCounter3.incrementCount(Symbol.END_WORD);
                }
            }
        }
        Set keysBelow = Counters.keysBelow(classicCounter3, 1.5d);
        Set keysBelow2 = Counters.keysBelow(classicCounter2, 1.5d);
        ClassicCounter classicCounter4 = new ClassicCounter();
        Iterator it2 = keysBelow2.iterator();
        while (it2.hasNext()) {
            classicCounter4.incrementCount(((TaggedWord) it2.next()).tag());
        }
        Distribution distribution = Distribution.getDistribution(classicCounter4);
        ClassicCounter classicCounter5 = new ClassicCounter();
        Iterator it3 = keysBelow.iterator();
        while (it3.hasNext()) {
            classicCounter5.incrementCount(Character.valueOf(RadicalMap.getRadical(((Symbol) it3.next()).getCh())));
        }
        Distribution distribution2 = Distribution.getDistribution(classicCounter5);
        Distribution distribution3 = Distribution.getDistribution(classicCounter);
        DecimalFormat decimalFormat = new DecimalFormat("##.##%");
        pw.println("There are " + keysBelow.size() + " singleton chars out of " + ((int) classicCounter3.totalCount()) + " tokens and " + classicCounter3.size() + " types found in " + i + " trees.");
        pw.println("Thus singletonChars comprise " + decimalFormat.format(keysBelow.size() / classicCounter3.totalCount()) + " of tokens and " + decimalFormat.format(keysBelow.size() / classicCounter3.size()) + " of types.");
        pw.println();
        pw.println("There are " + keysBelow2.size() + " singleton words out of " + ((int) classicCounter2.totalCount()) + " tokens and " + classicCounter2.size() + " types.");
        pw.println("Thus singletonWords comprise " + decimalFormat.format(keysBelow2.size() / classicCounter2.totalCount()) + " of tokens and " + decimalFormat.format(keysBelow2.size() / classicCounter2.size()) + " of types.");
        pw.println();
        pw.println("Distribution over singleton word POS:");
        pw.println(distribution.toString());
        pw.println();
        pw.println("Distribution over singleton char radicals:");
        pw.println(distribution2.toString());
        pw.println();
        pw.println("Distribution over word length:");
        pw.println(distribution3);
    }

    @Override // edu.stanford.nlp.parser.lexparser.Lexicon
    public void train(Collection<Tree> collection) {
        Symbol symbol;
        Numberer globalNumberer = Numberer.getGlobalNumberer("tags");
        Timing.tick("Counting characters...");
        ClassicCounter classicCounter = new ClassicCounter();
        Iterator<Tree> it = collection.iterator();
        while (it.hasNext()) {
            List yield = it.next().yield(new ArrayList());
            int size = yield.size();
            for (int i = 0; i < size; i++) {
                String value = ((Label) yield.get(i)).value();
                if (!value.equals(Lexicon.BOUNDARY)) {
                    int length = value.length();
                    for (int i2 = 0; i2 < length; i2++) {
                        classicCounter.incrementCount(Symbol.cannonicalSymbol(value.charAt(i2)));
                    }
                    classicCounter.incrementCount(Symbol.END_WORD);
                }
            }
        }
        Set keysBelow = Counters.keysBelow(classicCounter, 1.5d);
        this.knownChars = new HashSet(classicCounter.keySet());
        Timing.tick("Counting nGrams...");
        GeneralizedCounter[] generalizedCounterArr = new GeneralizedCounter[3];
        for (int i3 = 0; i3 <= 2; i3++) {
            generalizedCounterArr[i3] = new GeneralizedCounter(i3 + 2);
        }
        ClassicCounter classicCounter2 = new ClassicCounter();
        ArrayList arrayList = new ArrayList(3);
        Iterator<Tree> it2 = collection.iterator();
        while (it2.hasNext()) {
            for (TaggedWord taggedWord : it2.next().taggedYield()) {
                String word = taggedWord.word();
                String tag = taggedWord.tag();
                globalNumberer.number(tag);
                if (!word.equals(Lexicon.BOUNDARY)) {
                    classicCounter2.incrementCount(tag);
                    int length2 = word.length();
                    for (int i4 = 0; i4 <= length2; i4++) {
                        Symbol symbol2 = null;
                        arrayList.clear();
                        arrayList.add(tag);
                        if (i4 < length2) {
                            symbol = Symbol.cannonicalSymbol(word.charAt(i4));
                            if (keysBelow.contains(symbol)) {
                                symbol2 = unknownCharClass(symbol);
                                classicCounter.incrementCount(symbol2);
                            }
                        } else {
                            symbol = Symbol.END_WORD;
                        }
                        generalizedCounterArr[0].incrementCount(arrayList, symbol);
                        if (symbol2 != null) {
                            generalizedCounterArr[0].incrementCount(arrayList, symbol2);
                        }
                        int i5 = 1;
                        while (true) {
                            if (i5 > 2) {
                                break;
                            }
                            if (i4 - i5 < 0) {
                                arrayList.add(Symbol.BEGIN_WORD);
                                generalizedCounterArr[i5].incrementCount(arrayList, symbol);
                                if (symbol2 != null) {
                                    generalizedCounterArr[i5].incrementCount(arrayList, symbol2);
                                }
                            } else {
                                Symbol cannonicalSymbol = Symbol.cannonicalSymbol(word.charAt(i4 - i5));
                                if (keysBelow.contains(cannonicalSymbol)) {
                                    arrayList.add(unknownCharClass(cannonicalSymbol));
                                } else {
                                    arrayList.add(cannonicalSymbol);
                                }
                                generalizedCounterArr[i5].incrementCount(arrayList, symbol);
                                if (symbol2 != null) {
                                    generalizedCounterArr[i5].incrementCount(arrayList, symbol2);
                                }
                                i5++;
                            }
                        }
                    }
                }
            }
        }
        this.POSDistribution = Distribution.getDistribution(classicCounter2);
        Timing.tick("Creating character prior distribution...");
        this.charDistributions = new HashMap();
        this.charDistributions.put(Collections.EMPTY_LIST, Distribution.goodTuringSmoothedCounter(classicCounter, classicCounter.size() + keysBelow.size()));
        for (int i6 = 0; i6 <= 2; i6++) {
            Set<Map.Entry> lowestLevelCounterEntrySet = generalizedCounterArr[i6].lowestLevelCounterEntrySet();
            Timing.tick("Creating " + lowestLevelCounterEntrySet.size() + " character " + (i6 + 1) + "-gram distributions...");
            for (Map.Entry entry : lowestLevelCounterEntrySet) {
                List list = (List) entry.getKey();
                this.charDistributions.put(list, Distribution.dynamicCounterWithDirichletPrior((ClassicCounter) entry.getValue(), this.charDistributions.get(list.subList(0, list.size() - 1)), r0.getNumberOfKeys() / 200.0d));
            }
        }
    }

    public Distribution getPOSDistribution() {
        return this.POSDistribution;
    }

    public static boolean isForeign(String str) {
        for (int i = 0; i < str.length(); i++) {
            int numericValue = Character.getNumericValue(str.charAt(i));
            if (numericValue < 10 || numericValue > 35) {
                return false;
            }
        }
        return true;
    }

    private Symbol unknownCharClass(Symbol symbol) {
        return useUnknownCharModel ? new Symbol(Character.toString(RadicalMap.getRadical(symbol.getCh()))).intern() : Symbol.UNKNOWN;
    }

    @Override // edu.stanford.nlp.parser.lexparser.Lexicon
    public float score(IntTaggedWord intTaggedWord, int i) {
        TaggedWord taggedWord = intTaggedWord.toTaggedWord();
        String word = taggedWord.word();
        String tag = taggedWord.tag();
        if (!$assertionsDisabled && word.equals(Lexicon.BOUNDARY)) {
            throw new AssertionError();
        }
        char[] charArray = word.toCharArray();
        ArrayList arrayList = new ArrayList(charArray.length + 2 + 1);
        arrayList.add(Symbol.END_WORD);
        for (int length = charArray.length - 1; length >= 0; length--) {
            Symbol cannonicalSymbol = Symbol.cannonicalSymbol(charArray[length]);
            if (this.knownChars.contains(cannonicalSymbol)) {
                arrayList.add(cannonicalSymbol);
            } else {
                arrayList.add(unknownCharClass(cannonicalSymbol));
            }
        }
        for (int i2 = 0; i2 < 2; i2++) {
            arrayList.add(Symbol.BEGIN_WORD);
        }
        double d = 0.0d;
        int size = arrayList.size();
        for (int i3 = 0; i3 < size - 2; i3++) {
            Symbol symbol = (Symbol) arrayList.get(i3);
            arrayList.set(i3, tag);
            d += Math.log(getBackedOffDist(arrayList.subList(i3, i3 + 2 + 1)).probabilityOf(symbol));
        }
        switch (penaltyType) {
            case 1:
                d -= (charArray.length * (charArray.length + 1)) * (lengthPenalty / 2.0d);
                break;
            case 2:
                d -= (charArray.length - 1) * lengthPenalty;
                break;
        }
        return (float) d;
    }

    private Distribution getBackedOffDist(List list) {
        for (int i = 3; i >= 0; i--) {
            List subList = list.subList(0, i);
            if (this.charDistributions.containsKey(subList)) {
                return this.charDistributions.get(subList);
            }
        }
        throw new RuntimeException("OOPS... no prior distribution...?");
    }

    /* JADX WARN: Can't fix incorrect switch cases order, some code will duplicate */
    /* JADX WARN: Failed to find 'out' block for switch in B:10:0x0058. Please report as an issue. */
    /* JADX WARN: Removed duplicated region for block: B:23:0x009f A[LOOP:2: B:21:0x0099->B:23:0x009f, LOOP_END] */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public java.lang.String sampleFrom(java.lang.String r8) {
        /*
            r7 = this;
            java.lang.StringBuilder r0 = new java.lang.StringBuilder
            r1 = r0
            r1.<init>()
            r9 = r0
            java.util.ArrayList r0 = new java.util.ArrayList
            r1 = r0
            r2 = 3
            r1.<init>(r2)
            r10 = r0
            r0 = r10
            r1 = r8
            boolean r0 = r0.add(r1)
            r0 = 0
            r11 = r0
        L1c:
            r0 = r11
            r1 = 2
            if (r0 >= r1) goto L32
            r0 = r10
            edu.stanford.nlp.parser.lexparser.ChineseCharacterBasedLexicon$Symbol r1 = edu.stanford.nlp.parser.lexparser.ChineseCharacterBasedLexicon.Symbol.BEGIN_WORD
            boolean r0 = r0.add(r1)
            int r11 = r11 + 1
            goto L1c
        L32:
            r0 = r7
            r1 = r10
            edu.stanford.nlp.stats.Distribution r0 = r0.getBackedOffDist(r1)
            r11 = r0
            r0 = r11
            java.lang.Object r0 = r0.sampleFrom()
            edu.stanford.nlp.parser.lexparser.ChineseCharacterBasedLexicon$Symbol r0 = (edu.stanford.nlp.parser.lexparser.ChineseCharacterBasedLexicon.Symbol) r0
            r12 = r0
        L43:
            r0 = r12
            edu.stanford.nlp.parser.lexparser.ChineseCharacterBasedLexicon$Symbol r1 = edu.stanford.nlp.parser.lexparser.ChineseCharacterBasedLexicon.Symbol.END_WORD
            if (r0 == r1) goto Ld6
            r0 = r9
            r1 = r12
            char r1 = r1.getCh()
            java.lang.StringBuilder r0 = r0.append(r1)
            int r0 = edu.stanford.nlp.parser.lexparser.ChineseCharacterBasedLexicon.penaltyType
            switch(r0) {
                case 1: goto L74;
                case 2: goto L89;
                default: goto L96;
            }
        L74:
            double r0 = java.lang.Math.random()
            double r1 = edu.stanford.nlp.parser.lexparser.ChineseCharacterBasedLexicon.lengthPenalty
            r2 = r9
            int r2 = r2.length()
            double r2 = (double) r2
            double r1 = java.lang.Math.pow(r1, r2)
            int r0 = (r0 > r1 ? 1 : (r0 == r1 ? 0 : -1))
            if (r0 <= 0) goto L96
            goto Ld6
        L89:
            double r0 = java.lang.Math.random()
            double r1 = edu.stanford.nlp.parser.lexparser.ChineseCharacterBasedLexicon.lengthPenalty
            int r0 = (r0 > r1 ? 1 : (r0 == r1 ? 0 : -1))
            if (r0 <= 0) goto L96
            goto Ld6
        L96:
            r0 = 1
            r13 = r0
        L99:
            r0 = r13
            r1 = 2
            if (r0 >= r1) goto Lb8
            r0 = r10
            r1 = r13
            r2 = 1
            int r1 = r1 + r2
            r2 = r10
            r3 = r13
            java.lang.Object r2 = r2.get(r3)
            java.lang.Object r0 = r0.set(r1, r2)
            int r13 = r13 + 1
            goto L99
        Lb8:
            r0 = r10
            r1 = 1
            r2 = r12
            java.lang.Object r0 = r0.set(r1, r2)
            r0 = r7
            r1 = r10
            edu.stanford.nlp.stats.Distribution r0 = r0.getBackedOffDist(r1)
            r11 = r0
            r0 = r11
            java.lang.Object r0 = r0.sampleFrom()
            edu.stanford.nlp.parser.lexparser.ChineseCharacterBasedLexicon$Symbol r0 = (edu.stanford.nlp.parser.lexparser.ChineseCharacterBasedLexicon.Symbol) r0
            r12 = r0
            goto L43
        Ld6:
            r0 = r9
            java.lang.String r0 = r0.toString()
            return r0
        */
        throw new UnsupportedOperationException("Method not decompiled: edu.stanford.nlp.parser.lexparser.ChineseCharacterBasedLexicon.sampleFrom(java.lang.String):java.lang.String");
    }

    public String sampleFrom() {
        return sampleFrom((String) this.POSDistribution.sampleFrom());
    }

    @Override // edu.stanford.nlp.parser.lexparser.Lexicon
    public Iterator<IntTaggedWord> ruleIteratorByWord(int i, int i2) {
        throw new UnsupportedOperationException("ChineseCharacterBasedLexicon has no rule iterator!");
    }

    @Override // edu.stanford.nlp.parser.lexparser.Lexicon
    public int numRules() {
        return 0;
    }

    public void tune(List list) {
    }

    private Distribution getWordLengthDistribution() {
        int i = 0;
        ClassicCounter classicCounter = new ClassicCounter();
        while (true) {
            int i2 = i;
            i++;
            if (i2 >= 10000) {
                System.out.println();
                return Distribution.getDistribution(classicCounter);
            }
            classicCounter.incrementCount(Integer.valueOf(sampleFrom().length()));
            if (i % 1000 == 0) {
                System.out.print(".");
            }
        }
    }

    public static void main(String[] strArr) throws IOException {
        Sentence<Word> sentence;
        Tree transformTree;
        MemoryTreebank memoryTreebank;
        MemoryTreebank memoryTreebank2;
        HashMap hashMap = new HashMap();
        hashMap.put("-parser", 3);
        hashMap.put("-lex", 3);
        hashMap.put("-test", 2);
        hashMap.put("-out", 1);
        hashMap.put("-lengthPenalty", 1);
        hashMap.put("-penaltyType", 1);
        hashMap.put("-maxLength", 1);
        hashMap.put("-stats", 2);
        Map<String, String[]> argsToMap = StringUtils.argsToMap(strArr, hashMap);
        boolean containsKey = argsToMap.containsKey("-eval");
        if (argsToMap.containsKey("-out")) {
            pw = new PrintWriter((Writer) new OutputStreamWriter(new FileOutputStream(argsToMap.get("-out")[0]), ChineseTreebankLanguagePack.ENCODING), true);
        }
        System.err.println("ChineseCharacterBasedLexicon called with args:");
        ChineseTreebankParserParams chineseTreebankParserParams = new ChineseTreebankParserParams();
        for (int i = 0; i < strArr.length; i++) {
            chineseTreebankParserParams.setOptionFlag(strArr, i);
            System.err.print(" " + strArr[i]);
        }
        System.err.println();
        Options options = new Options(chineseTreebankParserParams);
        if (argsToMap.containsKey("-stats")) {
            String[] strArr2 = argsToMap.get("-stats");
            MemoryTreebank memoryTreebank3 = options.tlpParams.memoryTreebank();
            memoryTreebank3.loadPath(new File(strArr2[0]), new NumberRangesFileFilter(strArr2[1], false));
            System.err.println("Done reading trees.");
            if (argsToMap.containsKey("-annotate")) {
                memoryTreebank2 = new MemoryTreebank();
                TreeAnnotator treeAnnotator = new TreeAnnotator(chineseTreebankParserParams.headFinder(), chineseTreebankParserParams);
                Iterator<Tree> it = memoryTreebank3.iterator();
                while (it.hasNext()) {
                    memoryTreebank2.add(treeAnnotator.transformTree(it.next()));
                }
                System.err.println("Done annotating trees.");
            } else {
                memoryTreebank2 = memoryTreebank3;
            }
            printStats(memoryTreebank2);
            System.exit(0);
        }
        if (argsToMap.containsKey("-norm")) {
            Test.lengthNormalization = true;
        }
        int parseInt = argsToMap.containsKey("-maxLength") ? Integer.parseInt(argsToMap.get("-maxLength")[0]) : 1000000;
        Test.maxLength = 120;
        boolean containsKey2 = argsToMap.containsKey("-combo");
        if (containsKey2) {
            chineseTreebankParserParams.useCharacterBasedLexicon = true;
            Test.maxSpanForTags = 10;
            options.doDep = false;
            options.dcTags = false;
        }
        if (argsToMap.containsKey("-rad")) {
            useUnknownCharModel = true;
        }
        LexicalizedParser lexicalizedParser = null;
        Lexicon lexicon = null;
        if (argsToMap.containsKey("-parser")) {
            String[] strArr3 = argsToMap.get("-parser");
            if (strArr3.length > 1) {
                lexicalizedParser = new LexicalizedParser(strArr3[0], new NumberRangesFileFilter(strArr3[1], false), options);
                if (strArr3.length == 3) {
                    String str = strArr3[2];
                    System.err.println("Writing parser in serialized format to file " + str + " ");
                    System.err.flush();
                    ObjectOutputStream writeStreamFromString = IOUtils.writeStreamFromString(str);
                    writeStreamFromString.writeObject(lexicalizedParser.parserData());
                    writeStreamFromString.close();
                    System.err.println("done.");
                }
            } else {
                lexicalizedParser = new LexicalizedParser(strArr3[0], options);
            }
            lexicon = lexicalizedParser.getLexicon();
            options = lexicalizedParser.getOp();
            chineseTreebankParserParams = (ChineseTreebankParserParams) options.tlpParams;
        }
        if (argsToMap.containsKey("-lex")) {
            String[] strArr4 = argsToMap.get("-lex");
            if (strArr4.length > 1) {
                lexicon = chineseTreebankParserParams.lex(options.lexOptions);
                MemoryTreebank memoryTreebank4 = options.tlpParams.memoryTreebank();
                memoryTreebank4.loadPath(new File(strArr4[0]), new NumberRangesFileFilter(strArr4[1], false));
                System.err.println("Done reading trees.");
                if (argsToMap.containsKey("-annotate")) {
                    memoryTreebank = new MemoryTreebank();
                    TreeAnnotator treeAnnotator2 = new TreeAnnotator(chineseTreebankParserParams.headFinder(), chineseTreebankParserParams);
                    Iterator<Tree> it2 = memoryTreebank4.iterator();
                    while (it2.hasNext()) {
                        memoryTreebank.add(treeAnnotator2.transformTree(it2.next()));
                    }
                    System.err.println("Done annotating trees.");
                } else {
                    memoryTreebank = memoryTreebank4;
                }
                lexicon.train(memoryTreebank);
                System.err.println("Done training lexicon.");
                if (strArr4.length == 3) {
                    String str2 = strArr4.length == 3 ? strArr4[2] : "parsers/chineseCharLex.ser.gz";
                    System.err.println("Writing lexicon in serialized format to file " + str2 + " ");
                    System.err.flush();
                    ObjectOutputStream writeStreamFromString2 = IOUtils.writeStreamFromString(str2);
                    writeStreamFromString2.writeObject(lexicon);
                    writeStreamFromString2.close();
                    System.err.println("done.");
                }
            } else {
                String str3 = strArr4.length == 1 ? strArr4[0] : "parsers/chineseCharLex.ser.gz";
                System.err.println("Reading Lexicon from file " + str3);
                ObjectInputStream readStreamFromString = IOUtils.readStreamFromString(str3);
                try {
                    lexicon = (Lexicon) readStreamFromString.readObject();
                    readStreamFromString.close();
                } catch (ClassNotFoundException e) {
                    throw new RuntimeException("Bad serialized file: " + str3);
                }
            }
        }
        if (argsToMap.containsKey("-lengthPenalty")) {
            lengthPenalty = Double.parseDouble(argsToMap.get("-lengthPenalty")[0]);
        }
        if (argsToMap.containsKey("-penaltyType")) {
            penaltyType = Integer.parseInt(argsToMap.get("-penaltyType")[0]);
        }
        if (argsToMap.containsKey("-test")) {
            boolean z = chineseTreebankParserParams.segmentMarkov || chineseTreebankParserParams.segmentMaxMatch;
            boolean z2 = lexicalizedParser != null;
            if (!$assertionsDisabled && !z2 && !z) {
                throw new AssertionError();
            }
            WordSegmenter wordSegmenter = z ? (WordSegmenter) lexicon : null;
            String[] strArr5 = argsToMap.get("-test");
            MemoryTreebank memoryTreebank5 = options.tlpParams.memoryTreebank();
            memoryTreebank5.loadPath(new File(strArr5[0]), new NumberRangesFileFilter(strArr5[1], false));
            TreeTransformer subcategoryStripper = options.tlpParams.subcategoryStripper();
            TreeTransformer collinizer = chineseTreebankParserParams.collinizer();
            WordCatEquivalenceClasser wordCatEquivalenceClasser = new WordCatEquivalenceClasser();
            WordCatEqualityChecker wordCatEqualityChecker = new WordCatEqualityChecker();
            EquivalenceClassEval equivalenceClassEval = new EquivalenceClassEval(wordCatEquivalenceClasser, wordCatEqualityChecker, "basic");
            EquivalenceClassEval equivalenceClassEval2 = new EquivalenceClassEval(wordCatEquivalenceClasser, wordCatEqualityChecker, "collinized");
            ArrayList arrayList = new ArrayList(3);
            boolean z3 = false;
            if (z) {
                arrayList.add(WordCatConstituent.wordType);
                if (chineseTreebankParserParams.segmentMarkov && !z2) {
                    arrayList.add(WordCatConstituent.tagType);
                    z3 = true;
                }
            }
            if (z2) {
                arrayList.add(WordCatConstituent.tagType);
                arrayList.add(WordCatConstituent.catType);
                if (containsKey2) {
                    arrayList.add(WordCatConstituent.wordType);
                    z3 = true;
                }
            }
            TreeToBracketProcessor treeToBracketProcessor = new TreeToBracketProcessor(arrayList);
            System.err.println("Testing...");
            Iterator<Tree> it3 = memoryTreebank5.iterator();
            while (it3.hasNext()) {
                Tree firstChild = it3.next().firstChild();
                Sentence<Word> yield = firstChild.yield();
                if (yield.length() > parseInt) {
                    System.err.println("Skipping sentence; too long: " + yield.length());
                } else {
                    System.err.println("Processing sentence; length: " + yield.length());
                    if (z) {
                        StringBuilder sb = new StringBuilder();
                        Iterator<T> it4 = yield.iterator();
                        while (it4.hasNext()) {
                            sb.append(((StringLabel) it4.next()).value());
                        }
                        sentence = wordSegmenter.segmentWords(sb.toString());
                    } else {
                        sentence = yield;
                    }
                    if (z2) {
                        lexicalizedParser.parse(sentence);
                        transformTree = lexicalizedParser.getBestParse();
                        if (transformTree == null) {
                            throw new RuntimeException("PARSER RETURNED NULL!!!");
                        }
                    } else {
                        transformTree = subcategoryStripper.transformTree(Trees.toFlatTree(sentence));
                    }
                    if (pw != null) {
                        if (!z2) {
                            Iterator<T> it5 = sentence.iterator();
                            while (true) {
                                pw.print(((Word) it5.next()).word());
                                if (!it5.hasNext()) {
                                    break;
                                } else {
                                    pw.print(" ");
                                }
                            }
                        } else {
                            transformTree.pennPrint(pw);
                        }
                        pw.println();
                    }
                    if (containsKey) {
                        Collection allBrackets = treeToBracketProcessor.allBrackets(transformTree);
                        Collection allBrackets2 = treeToBracketProcessor.allBrackets(firstChild);
                        if (z3) {
                            allBrackets.addAll(treeToBracketProcessor.commonWordTagTypeBrackets(transformTree, firstChild));
                            allBrackets2.addAll(treeToBracketProcessor.commonWordTagTypeBrackets(firstChild, transformTree));
                        }
                        equivalenceClassEval.eval(allBrackets, allBrackets2);
                        System.out.println("\nScores:");
                        equivalenceClassEval.displayLast();
                        Tree transformTree2 = collinizer.transformTree(transformTree);
                        Tree transformTree3 = collinizer.transformTree(firstChild);
                        Collection allBrackets3 = treeToBracketProcessor.allBrackets(transformTree2);
                        Collection allBrackets4 = treeToBracketProcessor.allBrackets(transformTree3);
                        if (z3) {
                            allBrackets3.addAll(treeToBracketProcessor.commonWordTagTypeBrackets(transformTree2, transformTree3));
                            allBrackets4.addAll(treeToBracketProcessor.commonWordTagTypeBrackets(transformTree3, transformTree2));
                        }
                        equivalenceClassEval2.eval(allBrackets3, allBrackets4);
                        System.out.println("\nCollinized scores:");
                        equivalenceClassEval2.displayLast();
                        System.out.println();
                    }
                }
            }
            if (containsKey) {
                equivalenceClassEval.display();
                System.out.println();
                equivalenceClassEval2.display();
            }
        }
    }

    @Override // edu.stanford.nlp.parser.lexparser.Lexicon
    public void readData(BufferedReader bufferedReader) throws IOException {
        throw new UnsupportedOperationException();
    }

    @Override // edu.stanford.nlp.parser.lexparser.Lexicon
    public void writeData(Writer writer) throws IOException {
        throw new UnsupportedOperationException();
    }

    @Override // edu.stanford.nlp.parser.lexparser.Lexicon
    public boolean isKnown(int i) {
        throw new UnsupportedOperationException();
    }

    @Override // edu.stanford.nlp.parser.lexparser.Lexicon
    public boolean isKnown(String str) {
        throw new UnsupportedOperationException();
    }

    @Override // edu.stanford.nlp.parser.lexparser.Lexicon
    public UnknownWordModel getUnknownWordModel() {
        return null;
    }

    @Override // edu.stanford.nlp.parser.lexparser.Lexicon
    public void setUnknownWordModel(UnknownWordModel unknownWordModel) {
    }

    static {
        $assertionsDisabled = !ChineseCharacterBasedLexicon.class.desiredAssertionStatus();
        pw = null;
        lengthPenalty = 5.0d;
        penaltyType = 0;
        useUnknownCharModel = true;
        formatter = new DecimalFormat("0.000");
    }
}
