package edu.stanford.nlp.trees.international.arabic;

import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.trees.BobChrisTreeNormalizer;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.trees.tregex.ParseException;
import edu.stanford.nlp.trees.tregex.TregexMatcher;
import edu.stanford.nlp.trees.tregex.TregexPattern;
import edu.stanford.nlp.util.Filter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/stanford/nlp/trees/international/arabic/ArabicTreeNormalizer.class */
public class ArabicTreeNormalizer extends BobChrisTreeNormalizer {
    private boolean retainNPTmp;
    private boolean markPRDverb;
    private boolean collapse3LetterPrepositionVariants;
    private boolean normalizeConj;
    private boolean changeNoLabels;
    private Pattern prdPattern;
    private TregexPattern prdVerbPattern;
    private static final boolean escape = false;
    private boolean warnedPrepositions;
    private static final Collection<String> escapeCharacters = Arrays.asList("/", "*");
    private static final Pattern wrongConjPattern = Pattern.compile("NNP|NO_FUNC|NOFUNC|IN");

    /* loaded from: input_file:edu/stanford/nlp/trees/international/arabic/ArabicTreeNormalizer$ArabicEmptyFilter.class */
    public static class ArabicEmptyFilter implements Filter<Tree> {
        private static final long serialVersionUID = 7417844982953945964L;

        @Override // edu.stanford.nlp.util.Filter
        public boolean accept(Tree tree) {
            Tree[] children = tree.children();
            Label label = tree.label();
            if (label != null && "-NONE-".equals(label.value()) && !tree.isLeaf() && children.length == 1 && children[0].isLeaf()) {
                return false;
            }
            if (label != null && "PRP".equals(label.value()) && children.length == 1 && children[0].isLeaf() && children[0].label() != null) {
                return ("nullp".equals(label.value()) || "_".equals(label.value()) || "_".equals(label.value())) ? false : true;
            }
            return true;
        }
    }

    public ArabicTreeNormalizer(boolean z, boolean z2, boolean z3, boolean z4) {
        super(new ArabicTreebankLanguagePack());
        this.normalizeConj = false;
        this.changeNoLabels = false;
        this.prdPattern = Pattern.compile("^[A-Z]+-PRD");
        this.warnedPrepositions = false;
        this.retainNPTmp = z;
        this.markPRDverb = z2;
        this.changeNoLabels = z3;
        this.collapse3LetterPrepositionVariants = z4;
        try {
            this.prdVerbPattern = TregexPattern.compile("/^V[^P]/ > VP $ /-PRD$/=prd");
            this.emptyFilter = new ArabicEmptyFilter();
        } catch (ParseException e) {
            System.out.println(e);
            throw new RuntimeException();
        }
    }

    public ArabicTreeNormalizer(boolean z, boolean z2, boolean z3) {
        this(z, z2, z3, false);
    }

    public ArabicTreeNormalizer(boolean z, boolean z2) {
        this(z, z2, false);
    }

    public ArabicTreeNormalizer(boolean z) {
        this(z, false);
    }

    public ArabicTreeNormalizer() {
        this(false);
    }

    @Override // edu.stanford.nlp.trees.BobChrisTreeNormalizer, edu.stanford.nlp.trees.TreeNormalizer
    public String normalizeNonterminal(String str) {
        return this.changeNoLabels ? str : (this.retainNPTmp && str != null && str.startsWith("NP-TMP")) ? "NP-TMP" : (this.markPRDverb && str != null && this.prdPattern.matcher(str).matches()) ? str : super.normalizeNonterminal(str);
    }

    @Override // edu.stanford.nlp.trees.BobChrisTreeNormalizer, edu.stanford.nlp.trees.TreeNormalizer
    public String normalizeTerminal(String str) {
        return this.changeNoLabels ? str : super.normalizeTerminal(str);
    }

    private void warnIfFirstTimePrep() {
        if (this.warnedPrepositions) {
            return;
        }
        this.warnedPrepositions = true;
        System.err.println("ATBNormalizer: mapping preposition forms: Ely to ElY; <ly, AlY, Aly to <lY; ldy to ldY; Hty to HtY");
    }

    private void do3LetterPrepositionVariants(Tree tree) {
        if (tree.value().equals("Ely")) {
            warnIfFirstTimePrep();
            tree.label().setValue("ElY");
            return;
        }
        if (tree.value().equals("<ly") || tree.firstChild().value().equals("Aly") || tree.firstChild().value().equals("AlY")) {
            warnIfFirstTimePrep();
            tree.label().setValue("<lY");
            return;
        }
        if (tree.value().equals("ldy")) {
            warnIfFirstTimePrep();
            tree.label().setValue("ldY");
            return;
        }
        if (tree.value().equals("Hty")) {
            warnIfFirstTimePrep();
            tree.label().setValue("HtY");
            return;
        }
        if (tree.value().equals("علي")) {
            warnIfFirstTimePrep();
            tree.label().setValue("على");
            return;
        }
        if (tree.value().equals("إلي") || tree.firstChild().value().equals("الي") || tree.firstChild().value().equals("الى")) {
            warnIfFirstTimePrep();
            tree.label().setValue("إلى");
        } else if (tree.value().equals("لدي")) {
            warnIfFirstTimePrep();
            tree.label().setValue("لدى");
        } else if (tree.value().equals("حتي")) {
            warnIfFirstTimePrep();
            tree.label().setValue("حتى");
        }
    }

    @Override // edu.stanford.nlp.trees.BobChrisTreeNormalizer, edu.stanford.nlp.trees.TreeNormalizer
    public Tree normalizeWholeTree(Tree tree, TreeFactory treeFactory) {
        Tree spliceOut = tree.prune(this.emptyFilter, treeFactory).spliceOut(this.aOverAFilter, treeFactory);
        Iterator<Tree> it = spliceOut.iterator();
        while (it.hasNext()) {
            Tree next = it.next();
            if (next.isPreTerminal()) {
                if (this.collapse3LetterPrepositionVariants && (next.value().equals("PREP") || next.value().equals("IN"))) {
                    do3LetterPrepositionVariants(next.firstChild());
                }
                if (next.label().value() == null || next.label().value().equals("")) {
                    System.err.println("ATBNormalizer ERROR: missing tag: " + next);
                }
                if (next.label().value().equals("NO_FUNC") && (next.firstChild().label().value().equals(".") || next.firstChild().label().value().equals("\""))) {
                    System.err.println("ArabicTreeNormalizer: changing NO_FUNC tag to PUNC: " + next);
                    next.label().setValue("PUNC");
                }
            }
            if (!next.isPreTerminal() && !next.isLeaf()) {
                int numChildren = next.numChildren();
                ArrayList arrayList = new ArrayList(numChildren);
                for (int i = 0; i < numChildren; i++) {
                    Tree child = next.getChild(i);
                    if (child.isLeaf()) {
                        arrayList.add(treeFactory.newTreeNode("DUMMYTAG", Collections.singletonList(child)));
                    } else {
                        arrayList.add(child);
                    }
                }
                next.setChildren(arrayList);
            }
        }
        if (this.markPRDverb) {
            TregexMatcher matcher = this.prdVerbPattern.matcher(spliceOut);
            Tree tree2 = null;
            while (matcher.find()) {
                if (matcher.getMatch() != tree2) {
                    tree2 = matcher.getMatch();
                    tree2.label().setValue(tree2.label().value() + "-PRDverb");
                    Tree node = matcher.getNode("prd");
                    node.label().setValue(super.normalizeNonterminal(node.label().value()));
                }
            }
        }
        if (this.normalizeConj && spliceOut.isPreTerminal() && spliceOut.children()[0].label().value().equals("w") && wrongConjPattern.matcher(spliceOut.label().value()).matches()) {
            System.err.print("ATBNormalizer ERROR: bad CC remapped tree " + spliceOut + " to ");
            spliceOut.label().setValue("CC");
            System.err.println(spliceOut);
        }
        if (spliceOut.isPreTerminal()) {
            String value = spliceOut.label().value();
            if (value.equals("CC") || value.equals("PUNC") || value.equals("CONJ")) {
                System.err.println("ATBNormalizer ERROR: bare tagged word: " + spliceOut + " being wrapped in FRAG");
                spliceOut = treeFactory.newTreeNode("FRAG", Collections.singletonList(spliceOut));
            } else {
                System.err.println("ATBNormalizer ERROR: bare tagged word: " + spliceOut + ": fix it!!");
            }
        }
        if (!spliceOut.label().value().equals("ROOT")) {
            spliceOut = treeFactory.newTreeNode("ROOT", Collections.singletonList(spliceOut));
        }
        return spliceOut;
    }
}
