package monq.programs;

import com.ibm.wsdl.Constants;
import java.io.BufferedInputStream;
import java.io.FileDescriptor;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import monq.clifj.BooleanOption;
import monq.clifj.Commandline;
import monq.clifj.CommandlineException;
import monq.clifj.EnumOption;
import monq.clifj.LongOption;
import monq.clifj.Option;
import monq.ie.Term2Re;
import monq.jfa.AbstractFaAction;
import monq.jfa.CallbackException;
import monq.jfa.CompileDfaException;
import monq.jfa.Dfa;
import monq.jfa.DfaRun;
import monq.jfa.FaAction;
import monq.jfa.Nfa;
import monq.jfa.PrintfFormatter;
import monq.jfa.ReSyntaxException;
import monq.jfa.ReaderCharSource;
import monq.jfa.TextStore;
import monq.jfa.Xml;
import monq.jfa.actions.Copy;
import monq.jfa.actions.Drop;
import monq.jfa.actions.Fail;
import monq.jfa.actions.SwitchDfa;
import monq.jfa.ctx.Context;
import monq.jfa.ctx.ContextManager;
import monq.jfa.ctx.ContextStackProvider;
import monq.jfa.ctx.IfContext;
import monq.jfa.xml.StdCharEntities;
import monq.net.DfaRunService;
import monq.net.FilterServiceFactory;
import monq.net.Service;
import monq.net.ServiceCreateException;
import monq.net.ServiceFactory;
import monq.net.ServiceUnavailException;
import monq.net.TcpServer;
import monq.stuff.ConvinceGC;
import monq.stuff.EncodingDetector;
import monq.stuff.Sizeof;
import oracle.soap.server.internal.OracleServerConstants;
import org.apache.batik.dom.svg.SVGPathSegConstants;
import org.apache.batik.util.SVGConstants;
import org.apache.commons.cli.HelpFormatter;
import org.apache.log4j.Priority;

/* loaded from: input_file:lib/monq.jar:monq/programs/DictFilter.class */
public class DictFilter implements ServiceFactory {
    private Dfa dictDfa;
    private String inputEncoding = null;
    private String outputEncoding = EncodingDetector.defaultEnc;
    private static final FaAction do_template = new AbstractFaAction() { // from class: monq.programs.DictFilter.2
        Map m = new HashMap();

        @Override // monq.jfa.FaAction
        public void invoke(StringBuffer stringBuffer, int i, DfaRun dfaRun) throws CallbackException {
            ReadHelper readHelper = (ReadHelper) dfaRun.clientData;
            this.m.clear();
            Xml.splitElement(this.m, stringBuffer, i);
            if (this.m.size() != 2) {
                throw new CallbackException("malformed template, attributes not allowed");
            }
            int length = stringBuffer.length();
            stringBuffer.append(this.m.get(">"));
            StdCharEntities stdCharEntities = readHelper.helper;
            StdCharEntities.toChar(stringBuffer, length);
            try {
                readHelper.recentTemplate = new PrintfFormatter(stringBuffer, length);
                stringBuffer.setLength(i);
            } catch (ReSyntaxException e) {
                stringBuffer.setLength(length);
                throw new CallbackException("malformed template content (see cause)", e);
            }
        }
    };
    private static final FaAction do_t_r = new AbstractFaAction() { // from class: monq.programs.DictFilter.1
        Map m = new HashMap();
        DfaRun convert;

        {
            try {
                this.convert = Term2Re.createConverter(Term2Re.wordSplitRe, Term2Re.wordSepRe, Term2Re.trailContextRe);
            } catch (ReSyntaxException e) {
                throw new Error("impossible", e);
            }
        }

        private String convert(String str) {
            try {
                return this.convert.filter(str);
            } catch (IOException e) {
                throw new Error("impossible", e);
            }
        }

        @Override // monq.jfa.FaAction
        public void invoke(StringBuffer stringBuffer, int i, DfaRun dfaRun) throws CallbackException {
            int parseInt;
            ReadHelper readHelper = (ReadHelper) dfaRun.clientData;
            if (readHelper.recentTemplate == null) {
                throw new CallbackException("no <template> yet");
            }
            boolean z = stringBuffer.charAt(i + 1) == 't';
            this.m.clear();
            Xml.splitElement(this.m, stringBuffer, i);
            int length = stringBuffer.length();
            TextStore textStore = new TextStore();
            textStore.appendPart(stringBuffer, 0, 0);
            int i2 = 1;
            stringBuffer.append('p');
            while (true) {
                int i3 = i2;
                i2++;
                stringBuffer.append(i3);
                String substring = stringBuffer.substring(length);
                stringBuffer.setLength(length + 1);
                Object remove = this.m.remove(substring);
                if (remove == null) {
                    break;
                }
                stringBuffer.append(remove);
                StdCharEntities stdCharEntities = readHelper.helper;
                StdCharEntities.toChar(stringBuffer, length + 1);
                textStore.appendPart(stringBuffer, length + 1, stringBuffer.length());
                stringBuffer.setLength(length + 1);
            }
            stringBuffer.setLength(length);
            StdCharEntities stdCharEntities2 = readHelper.helper;
            String str = StdCharEntities.toChar((String) this.m.remove(">"));
            if (z) {
                str = convert(str);
                parseInt = 1;
            } else {
                String str2 = (String) this.m.remove("tc");
                if (str2 == null) {
                    str2 = "0";
                }
                try {
                    parseInt = Integer.parseInt(str2);
                    if (parseInt < 0) {
                        throw new CallbackException("found negative tc attribute");
                    }
                } catch (NumberFormatException e) {
                    throw new CallbackException("found tc attribute which is not a number", e);
                }
            }
            this.m.remove("<");
            if (this.m.size() > 0) {
                StringBuffer stringBuffer2 = new StringBuffer();
                stringBuffer2.append("superfluous attributes:");
                for (Object obj : this.m.keySet()) {
                    stringBuffer2.append(' ').append(obj).append('=').append(this.m.get(obj));
                }
                throw new CallbackException(stringBuffer2.toString());
            }
            if (readHelper.verbose) {
                System.err.println(new StringBuffer().append(">>").append(str).append("<<").toString());
            }
            try {
                readHelper.dict.or(str, new MwtCallback(textStore, readHelper.recentTemplate, parseInt, ReadHelper.access$308(readHelper)));
                stringBuffer.setLength(i);
            } catch (ReSyntaxException e2) {
                throw new CallbackException("regular expression syntax error (see cause)", e2);
            }
        }
    };

    /* loaded from: input_file:lib/monq.jar:monq/programs/DictFilter$MwtCallback.class */
    private static class MwtCallback extends AbstractFaAction {
        TextStore store;
        PrintfFormatter f;
        int tc;

        public MwtCallback(TextStore textStore, PrintfFormatter printfFormatter, int i, int i2) {
            this.store = textStore;
            this.f = printfFormatter;
            this.tc = i;
            this.priority = i2;
        }

        @Override // monq.jfa.FaAction
        public void invoke(StringBuffer stringBuffer, int i, DfaRun dfaRun) throws CallbackException {
            int length = stringBuffer.length();
            if (this.tc > 0 && this.tc < length - i) {
                int i2 = length - this.tc;
                length = i2;
                dfaRun.unskip(stringBuffer, i2);
            }
            synchronized (this.store) {
                int length2 = this.store.length();
                this.store.append(stringBuffer, i, length);
                this.store.setPart(0, length2, length2 + (length - i));
                stringBuffer.setLength(i);
                this.f.format(stringBuffer, this.store, null);
                this.store.setLength(length2);
            }
        }

        public String toString() {
            StringBuffer stringBuffer = new StringBuffer(80);
            stringBuffer.append(super.toString()).append("[store=`");
            this.store.getPart(stringBuffer, 0);
            stringBuffer.append("', formatter=`").append(this.f);
            stringBuffer.append("']");
            return stringBuffer.toString();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:lib/monq.jar:monq/programs/DictFilter$ReadHelper.class */
    public static final class ReadHelper implements ContextStackProvider {
        private PrintfFormatter recentTemplate;
        private Nfa dict;
        private boolean verbose;
        private List stack = new ArrayList();
        private int nextPrio = 1;
        StdCharEntities helper = new StdCharEntities();

        @Override // monq.jfa.ctx.ContextStackProvider
        public List getStack() {
            return this.stack;
        }

        public ReadHelper(boolean z) {
            this.verbose = false;
            this.verbose = z;
        }

        static int access$308(ReadHelper readHelper) {
            int i = readHelper.nextPrio;
            readHelper.nextPrio = i + 1;
            return i;
        }
    }

    public void setInputEncoding(String str) throws UnsupportedEncodingException {
        this.inputEncoding = str;
    }

    public void setOutputEncoding(String str) throws UnsupportedEncodingException {
        this.outputEncoding = str;
    }

    public DictFilter(InputStream inputStream, String str, String str2, boolean z) throws IOException, ReSyntaxException, CompileDfaException {
        init(new InputStreamReader(inputStream, EncodingDetector.detect(inputStream)), str, str2, z, false, true);
    }

    public DictFilter(Reader reader, String str, String str2, boolean z) throws IOException, ReSyntaxException, CompileDfaException {
        init(reader, str, str2, z, false, true);
    }

    public DictFilter(Reader reader, String str, String str2, boolean z, boolean z2, boolean z3) throws IOException, ReSyntaxException, CompileDfaException {
        init(reader, str, str2, z, z2, z3);
    }

    private void init(Reader reader, String str, String str2, boolean z, boolean z2, boolean z3) throws IOException, ReSyntaxException, CompileDfaException {
        ReadHelper readHelper = new ReadHelper(z);
        try {
            Nfa nfa = new Nfa(Nfa.NOTHING);
            Context addXml = new ContextManager(nfa).setDefaultAction(Drop.DROP).setDefaultFMB(DfaRun.UNMATCHED_THROW).addXml((Context) null, "mwt");
            nfa.or(Xml.S, Drop.DROP).or(Xml.XMLDecl, new IfContext(null, Drop.DROP).elsedo(new Fail("XML declaration in the wrong place"))).or(Xml.GoofedElement("template"), new IfContext(addXml, do_template).elsedo(new Fail("`template' must be child of `mwt'"))).or(Xml.GoofedElement(SVGPathSegConstants.PATHSEG_CURVETO_QUADRATIC_SMOOTH_REL_LETTER), new IfContext(addXml, do_t_r).elsedo(new Fail("`t' must be child of `mwt'"))).or(Xml.GoofedElement(SVGConstants.SVG_R_ATTRIBUTE), new IfContext(addXml, do_t_r).elsedo(new Fail("`r' must be child of `mwt'"))).or(Xml.Comment, Drop.DROP);
            DfaRun dfaRun = new DfaRun(nfa.compile(DfaRun.UNMATCHED_THROW, new IfContext(null, Drop.DROP).elsedo(new AbstractFaAction(this) { // from class: monq.programs.DictFilter.3
                private final DictFilter this$0;

                {
                    this.this$0 = this;
                }

                @Override // monq.jfa.FaAction
                public void invoke(StringBuffer stringBuffer, int i, DfaRun dfaRun2) throws CallbackException {
                    List stack = ((ReadHelper) dfaRun2.clientData).getStack();
                    throw new CallbackException(new StringBuffer().append("open context `").append(((Context) stack.get(stack.size() - 1)).getName()).append("'").toString());
                }
            })), new ReaderCharSource(reader));
            if (z3) {
                readHelper.dict = new Nfa("[A-Za-z0-9]+", new Copy(Priority.ALL_INT));
            } else {
                readHelper.dict = new Nfa(Nfa.NOTHING);
            }
            dfaRun.clientData = readHelper;
            dfaRun.filter(System.out);
            Nfa nfa2 = readHelper.dict;
            if ("raw".equals(str)) {
                this.dictDfa = nfa2.compile(DfaRun.UNMATCHED_COPY);
            } else if ("xml".equals(str)) {
                nfa2.or(new StringBuffer().append(Xml.STag()).append(OracleServerConstants.DMS_NOUN_SEPARATOR).append(Xml.ETag()).append(OracleServerConstants.DMS_NOUN_SEPARATOR).append(Xml.EmptyElemTag()).append(OracleServerConstants.DMS_NOUN_SEPARATOR).append(Xml.Reference).toString(), Copy.COPY).or("<[?](.*[?]>)!", Copy.COPY).or("<[!]--(.*-->)!", Copy.COPY);
                this.dictDfa = nfa2.compile(DfaRun.UNMATCHED_COPY);
            } else {
                if (!"elem".equals(str)) {
                    throw new IllegalArgumentException(new StringBuffer().append("`").append(str).append("' is not a valid input type").toString());
                }
                nfa2.or(Xml.Reference, Copy.COPY);
                SwitchDfa switchDfa = new SwitchDfa(Copy.COPY);
                switchDfa.setPriority(Integer.MAX_VALUE);
                SwitchDfa switchDfa2 = new SwitchDfa(Copy.COPY);
                Dfa compile = new Nfa(Xml.STag(str2), switchDfa2).compile(DfaRun.UNMATCHED_COPY);
                nfa2.or(Xml.ETag(str2), switchDfa);
                this.dictDfa = nfa2.compile(DfaRun.UNMATCHED_COPY);
                switchDfa.setDfa(compile);
                switchDfa2.setDfa(this.dictDfa);
                this.dictDfa = compile;
            }
            if (z2) {
                System.err.println("# Size of Nfa");
                Sizeof.printTypes(System.err, Sizeof.sizeof(nfa2));
                Hashtable sizeof = Sizeof.sizeof(this.dictDfa);
                System.err.println("# Size of Dfa");
                Sizeof.printTypes(System.err, sizeof);
            }
            ConvinceGC convinceGC = new ConvinceGC(10);
            if (z2) {
                convinceGC.setLogging(System.err);
            }
            Thread thread = new Thread(convinceGC);
            thread.setDaemon(true);
            thread.start();
        } catch (CompileDfaException e) {
            throw new Error("this cannot happen", e);
        } catch (ReSyntaxException e2) {
            throw new Error("this cannot happen", e2);
        }
    }

    public DfaRun createRun() {
        return new DfaRun(this.dictDfa);
    }

    public Dfa getDfa() {
        return this.dictDfa;
    }

    @Override // monq.net.ServiceFactory
    public Service createService(InputStream inputStream, OutputStream outputStream, Object obj) throws ServiceCreateException {
        InputStreamReader inputStreamReader;
        DfaRun createRun = createRun();
        if (this.inputEncoding == null) {
            if (!inputStream.markSupported()) {
                inputStream = new BufferedInputStream(inputStream);
            }
            String str = null;
            try {
                str = EncodingDetector.detect(inputStream, 1000, "UTF-8");
                inputStreamReader = new InputStreamReader(inputStream, str);
            } catch (UnsupportedEncodingException e) {
                throw new ServiceUnavailException(new StringBuffer().append("unsupported encoding `").append(str).append("' found in file").toString(), e);
            } catch (IOException e2) {
                throw new ServiceUnavailException("problems reading encoding", e2);
            }
        } else {
            try {
                inputStreamReader = new InputStreamReader(inputStream, this.inputEncoding);
            } catch (UnsupportedEncodingException e3) {
                throw new ServiceCreateException("non-existant input encoding set in DictFilter", e3);
            }
        }
        createRun.setIn(new ReaderCharSource(inputStreamReader));
        try {
            return new DfaRunService(createRun, new PrintStream(outputStream, true, this.outputEncoding));
        } catch (UnsupportedEncodingException e4) {
            throw new ServiceCreateException("non-existant output encoding specified in DictFilter", e4);
        }
    }

    public static void main(String[] strArr) throws IOException, CompileDfaException, ReSyntaxException {
        String property = System.getProperty("argv0", "DictFilter");
        Commandline commandline = new Commandline(property, "filter and tag text according to a dictionary (mwt file)", "filter", "dictionary file in mwt-format", 1, 1);
        commandline.addOption(new BooleanOption("-v", "write all generated regular expressions to standard error or the logfile"));
        commandline.addOption(new EnumOption("-t", "type", "type of input: raw=plain ascii, xml=recognize and skip xml tags, elem=tag only within xml element given with option -e", 1, 1, "|raw|xml|elem", null).required());
        commandline.addOption(new Option("-e", "elem", "specifies xml element within which to work, when '-t elem' is specified", 1, 1, new String[]{"plain"}));
        commandline.addOption(new LongOption("-p", Constants.ELEM_PORT, "run as a server on given port instead of filtering stdin->stdout", 1, 1, 0L, 65535L, null));
        commandline.addOption(new Option("-c", "fname", "store the compiled DFA in file fname and exit", 1, 1, null));
        commandline.addOption(new BooleanOption("-caw", "suppress additon of a catch-all word to the automaton that prevents against matching within words. You then could add the catch-all to the mwt file."));
        commandline.addOption(new Option("-ie", "inEnc", "encoding used for input stream, guessed from input if not specified and then defaults to the platform encoding", 1, 1, null));
        commandline.addOption(new Option("-oe", "outEnc", "encoding used for output stream, defaults to the platform encoding", 1, 1, null));
        commandline.addOption(new BooleanOption("-dm", "debug memory: write memory of Dfa and Nfa to stderr"));
        try {
            commandline.parse(strArr);
        } catch (CommandlineException e) {
            System.err.println(e.getMessage());
            System.exit(1);
        }
        String[] strArr2 = new String[2];
        strArr2[0] = null;
        strArr2[1] = null;
        String[] strArr3 = {"-ie", "-oe"};
        for (int i = 0; i < strArr3.length; i++) {
            if (commandline.available(strArr3[i])) {
                String stringValue = commandline.getStringValue(strArr3[i], null);
                strArr2[i] = stringValue;
                try {
                    Charset.forName(stringValue);
                } catch (UnsupportedCharsetException e2) {
                    System.err.println(new StringBuffer().append(property).append(": character set `").append(stringValue).append("' not supported").toString());
                    System.exit(1);
                }
            }
        }
        boolean available = commandline.available("-v");
        boolean available2 = commandline.available("-dm");
        boolean z = !commandline.available("-caw");
        String str = (String) commandline.getValue(HelpFormatter.DEFAULT_LONG_OPT_PREFIX);
        String str2 = (String) commandline.getValue("-t");
        String str3 = (String) commandline.getValue("-e");
        BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(str));
        DictFilter dictFilter = new DictFilter(new InputStreamReader(bufferedInputStream, EncodingDetector.detect(bufferedInputStream)), str2, str3, available, available2, z);
        bufferedInputStream.close();
        if (strArr2[0] != null) {
            dictFilter.setInputEncoding(strArr2[0]);
        }
        if (strArr2[1] != null) {
            dictFilter.setOutputEncoding(strArr2[1]);
        }
        if (commandline.available("-c")) {
            String stringValue2 = commandline.getStringValue("-c", null);
            ObjectOutputStream objectOutputStream = new ObjectOutputStream(new FileOutputStream(stringValue2));
            if (available) {
                System.err.println(new StringBuffer().append("Writing DFA to `").append(stringValue2).append("'").toString());
            }
            objectOutputStream.writeObject(DfaRun.UNMATCHED_COPY);
            objectOutputStream.writeObject(dictFilter.getDfa());
            objectOutputStream.close();
            System.exit(0);
        }
        if (commandline.available("-p")) {
            new TcpServer(((Long) commandline.getValue("-p")).intValue(), new FilterServiceFactory(dictFilter), 20).setLogging(System.out).serve();
        } else {
            dictFilter.createService(System.in, new FileOutputStream(FileDescriptor.out), null).run();
        }
    }
}
