package idx;

import com.lowagie.text.ElementTags;
import com.lowagie.text.html.Markup;
import monq.jfa.AbstractFaAction;
import monq.jfa.CompileDfaException;
import monq.jfa.Dfa;
import monq.jfa.DfaRun;
import monq.jfa.FaAction;
import monq.jfa.Nfa;
import monq.jfa.ReSyntaxException;
import monq.jfa.Xml;
import monq.jfa.actions.Copy;
import monq.jfa.actions.Drop;
import monq.jfa.actions.LowerCase;
import monq.jfa.actions.Run;
import org.apache.batik.util.CSSConstants;
import org.apache.batik.util.SVG12Constants;
import org.apache.batik.util.SVGConstants;
import org.apache.commons.codec.language.bm.Languages;
import pal.util.XMLConstants;
import prefuse.data.io.GraphMLReader;

/* loaded from: input_file:lib/indexing.jar:idx/TextTokenizer.class */
class TextTokenizer {
    private static Dfa dfa;
    public static final String SEP = "/.\\- ";
    public static final FaAction COPY1 = new Copy(2);
    private static final String[] BNC_256_MOST_FREQUENT = {"His", "form", "says", "better", "why", "system", "almost", "information", "high", "become", "able", "best", "called", "further", "case", "Well", "home", "having", CSSConstants.CSS_LARGE_VALUE, "early", "already", "help", "things", "One", "public", "No", "That", "point", "life", "looked", "away", "mean", "whether", "place", "taken", "here", "seen", "second", "five", "far", "government", "important", "On", "told", "often", "Oh", "British", "social", "At", "day", "every", "less", "during", "great", "until", "erm", "took", "left", "given", "four", "quite", CSSConstants.CSS_SMALL_VALUE, "man", "around", "When", "really", "since", "find", "end", "rather", "next", "year", "old", "something", SVGConstants.SVG_SET_TAG, "few", "give", "without", "look", SVGConstants.SVG_LOCAL_ATTRIBUTE, Markup.CSS_VALUE_ALWAYS, "within", "does", "long", "different", "another", "came", "while", "part", "thought", "went", "found", "So", ElementTags.NUMBER, "For", "right", "us", "little", "say", "too", "never", "As", "need", "want", "Mr", "What", "each", "against", "per", "off", "well", "use", "put", "under", "same", "come", "might", "both", XMLConstants.YEARS, "going", "used", "own", "good", "er", "last", "take", "work", "still", "three", "must", "before", "If", "way", "how", "You", "There", "We", "go", "even", "make", "They", "down", "through", "did", "many", "back", "think", "much", "those", "got", "now", "know", "our", "being", "made", "most", "me", "between", "because", "where", "get", "people", "after", "She", "these", "see", "such", "new", "than", ElementTags.FIRST, "him", "should", "time", "And", "then", "This", "may", "over", "just", "very", Languages.ANY, "them", "also", "But", "your", "said", "A", SVG12Constants.SVG_MY_ATRIBUTE, "other", "two", "like", "only", "could", "do", "no", "some", "In", "its", "into", "if", "what", "He", "so", "when", SVGConstants.SVG_OUT_VALUE, "up", "there", "about", "who", "It", "more", "can", "all", "would", "one", "we", "she", "will", "their", "has", "her", "been", "they", "were", "but", "an", "this", "or", "which", "his", "from", "had", "not", "he", "have", "are", "you", "by", "at", "as", "it", "with", "The", "be", "on", "I", GraphMLReader.Tokens.FOR, "was", "that", "is", "in", "a", "to", "and", "of", "the"};

    /* renamed from: idx.TextTokenizer$1, reason: invalid class name */
    /* loaded from: input_file:lib/indexing.jar:idx/TextTokenizer$1.class */
    static class AnonymousClass1 {
    }

    /* loaded from: input_file:lib/indexing.jar:idx/TextTokenizer$DoLowerWord.class */
    private static class DoLowerWord extends AbstractFaAction {
        private DoLowerWord() {
        }

        @Override // monq.jfa.FaAction
        public void invoke(StringBuffer stringBuffer, int i, DfaRun dfaRun) {
            String stripPlural;
            DfaTokenizer dfaTokenizer = (DfaTokenizer) dfaRun.clientData;
            int length = stringBuffer.length();
            if (dfaTokenizer.forQuery || null == (stripPlural = TextTokenizer.stripPlural(stringBuffer, i))) {
                dfaTokenizer.pushToken(stringBuffer.substring(i), i, length, 1);
            } else {
                dfaTokenizer.pushToken(stripPlural, i, length, 0);
                dfaTokenizer.pushToken(stringBuffer.substring(i), i, length, 1);
            }
        }

        DoLowerWord(AnonymousClass1 anonymousClass1) {
            this();
        }
    }

    /* loaded from: input_file:lib/indexing.jar:idx/TextTokenizer$PushUniprot.class */
    private static class PushUniprot extends AbstractFaAction {
        private PushUniprot() {
        }

        @Override // monq.jfa.FaAction
        public void invoke(StringBuffer stringBuffer, int i, DfaRun dfaRun) {
            DfaTokenizer dfaTokenizer = (DfaTokenizer) dfaRun.clientData;
            String str = (String) dfaTokenizer.recentUniprot.get("ids");
            dfaTokenizer.recentUniprot.clear();
            int length = str.length();
            int i2 = 0;
            while (true) {
                int i3 = i2;
                if (i3 >= length) {
                    return;
                }
                int indexOf = str.indexOf(44, i3);
                if (indexOf < 0) {
                    indexOf = length;
                }
                dfaTokenizer.pushToken(str.substring(i3, indexOf), i, i, 0);
                i2 = indexOf + 1;
            }
        }

        PushUniprot(AnonymousClass1 anonymousClass1) {
            this();
        }
    }

    /* loaded from: input_file:lib/indexing.jar:idx/TextTokenizer$SaveUniprot.class */
    private static class SaveUniprot extends AbstractFaAction {
        private SaveUniprot() {
        }

        @Override // monq.jfa.FaAction
        public void invoke(StringBuffer stringBuffer, int i, DfaRun dfaRun) {
            Xml.splitElement(((DfaTokenizer) dfaRun.clientData).recentUniprot, stringBuffer, i);
        }

        SaveUniprot(AnonymousClass1 anonymousClass1) {
            this();
        }
    }

    TextTokenizer() {
    }

    private static boolean endsWith(StringBuffer stringBuffer, String str) {
        int length = str.length();
        int length2 = stringBuffer.length() - length;
        if (length2 < 0) {
            return false;
        }
        for (int i = 0; i < length; i++) {
            if (str.charAt(i) != stringBuffer.charAt(length2 + i)) {
                return false;
            }
        }
        return true;
    }

    private static int cloneTail(StringBuffer stringBuffer, int i) {
        int length = stringBuffer.length();
        for (int i2 = i; i2 < length; i2++) {
            stringBuffer.append(stringBuffer.charAt(i2));
        }
        return length;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static String stripPlural(StringBuffer stringBuffer, int i) {
        int cloneTail;
        if (endsWith(stringBuffer, "ies")) {
            cloneTail = cloneTail(stringBuffer, i);
            int length = stringBuffer.length();
            stringBuffer.setLength(length - 2);
            stringBuffer.setCharAt(length - 3, 'y');
        } else if (endsWith(stringBuffer, "sses") || endsWith(stringBuffer, "xes")) {
            cloneTail = cloneTail(stringBuffer, i);
            stringBuffer.setLength(stringBuffer.length() - 2);
        } else {
            if (endsWith(stringBuffer, "ss") || endsWith(stringBuffer, "ous") || endsWith(stringBuffer, "sis") || endsWith(stringBuffer, "tis") || !endsWith(stringBuffer, "s")) {
                return null;
            }
            cloneTail = cloneTail(stringBuffer, i);
            stringBuffer.setLength(stringBuffer.length() - 1);
        }
        String substring = stringBuffer.substring(cloneTail);
        stringBuffer.setLength(cloneTail);
        return substring;
    }

    public static Dfa getDfa() {
        return dfa;
    }

    static {
        try {
            Nfa or = new Nfa(Nfa.NOTHING).or("[a-z]+", new DoLowerWord(null).setPriority(1)).or("[A-Za-z]+", new Run(LowerCase.LOWERCASE, new DoLowerWord(null))).or("[0-9]+", DfaTokenizer.PUSH).or("ABSTRACT +TRUNCATED +AT +[0-9]+ +WORDS", COPY1).or(Xml.STag("z:uniprot"), new SaveUniprot(null)).or(Xml.ETag("z:uniprot"), new PushUniprot(null)).or(Xml.STag("plain"), Drop.DROP).or(Xml.ETag("plain"), Drop.DROP).or("&[a-z]+;", Drop.DROP).or("[OPQ][0-9][A-Z0-9][A-Z0-9][A-Z0-9][0-9]", DfaTokenizer.PUSH);
            WordnetExceptions.addTo(or);
            WordnetVerbs.addTo(or);
            dfa = or.compile(DfaRun.UNMATCHED_COPY);
        } catch (CompileDfaException e) {
            throw new Error("impossible", e);
        } catch (ReSyntaxException e2) {
            throw new Error("impossible", e2);
        }
    }
}
