package org.apache.tika.parser.code;

import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.commons.lang3.SystemProperties;
import org.apache.tika.detect.AutoDetectReader;
import org.apache.tika.detect.EncodingDetector;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractEncodingDetectorParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.XHTMLContentHandler;
import org.codelibs.jhighlight.renderer.Renderer;
import org.codelibs.jhighlight.renderer.XhtmlRendererFactory;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.DataNode;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.NodeFilter;
import org.jsoup.select.NodeTraversor;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

/* loaded from: input_file:org/apache/tika/parser/code/SourceCodeParser.class */
public class SourceCodeParser extends AbstractEncodingDetectorParser {
    private static final long serialVersionUID = -4543476498190054160L;
    private static final Pattern AUTHORPATTERN = Pattern.compile("(?im)@author (.*) *$");
    private static final Map<MediaType, String> TYPES_TO_RENDERER = new HashMap<MediaType, String>() { // from class: org.apache.tika.parser.code.SourceCodeParser.1
        private static final long serialVersionUID = -741976157563751152L;

        {
            put(MediaType.text("x-c++src"), "cpp");
            put(MediaType.text("x-java-source"), "java");
            put(MediaType.text("x-groovy"), "groovy");
        }
    };

    /* loaded from: input_file:org/apache/tika/parser/code/SourceCodeParser$RuntimeSAXException.class */
    private static class RuntimeSAXException extends RuntimeException {
        private SAXException wrapped;

        private RuntimeSAXException(SAXException sAXException) {
            this.wrapped = sAXException;
        }

        SAXException getWrapped() {
            return this.wrapped;
        }
    }

    /* loaded from: input_file:org/apache/tika/parser/code/SourceCodeParser$TikaNodeFilter.class */
    private static class TikaNodeFilter implements NodeFilter {
        boolean ignore = true;
        ContentHandler handler;

        private TikaNodeFilter(ContentHandler contentHandler) {
            this.handler = contentHandler;
        }

        @Override // org.jsoup.select.NodeFilter
        public NodeFilter.FilterResult head(Node node, int i) {
            if ("html".equals(node.nodeName())) {
                this.ignore = false;
            }
            if (this.ignore) {
                return NodeFilter.FilterResult.CONTINUE;
            }
            if (node instanceof TextNode) {
                String wholeText = ((TextNode) node).getWholeText();
                if (wholeText != null) {
                    char[] charArray = wholeText.toCharArray();
                    try {
                        if (charArray.length > 0) {
                            this.handler.characters(charArray, 0, charArray.length);
                        }
                    } catch (SAXException e) {
                        throw new RuntimeSAXException(e);
                    }
                }
                return NodeFilter.FilterResult.CONTINUE;
            }
            if (node instanceof DataNode) {
                String wholeData = ((DataNode) node).getWholeData();
                if (wholeData != null) {
                    char[] charArray2 = wholeData.toCharArray();
                    try {
                        if (charArray2.length > 0) {
                            this.handler.characters(charArray2, 0, charArray2.length);
                        }
                    } catch (SAXException e2) {
                        throw new RuntimeSAXException(e2);
                    }
                }
                return NodeFilter.FilterResult.CONTINUE;
            }
            AttributesImpl attributesImpl = new AttributesImpl();
            Iterator<Attribute> it = node.attributes().iterator();
            while (it.hasNext()) {
                Attribute next = it.next();
                attributesImpl.addAttribute("", next.getKey(), next.getKey(), "", next.getValue());
            }
            try {
                this.handler.startElement("", node.nodeName(), node.nodeName(), attributesImpl);
                return NodeFilter.FilterResult.CONTINUE;
            } catch (SAXException e3) {
                throw new RuntimeSAXException(e3);
            }
        }

        @Override // org.jsoup.select.NodeFilter
        public NodeFilter.FilterResult tail(Node node, int i) {
            if ("html".equals(node.nodeName())) {
                this.ignore = true;
            }
            if (this.ignore) {
                return NodeFilter.FilterResult.CONTINUE;
            }
            if ((node instanceof TextNode) || (node instanceof DataNode)) {
                return NodeFilter.FilterResult.CONTINUE;
            }
            try {
                this.handler.endElement("", node.nodeName(), node.nodeName());
                return NodeFilter.FilterResult.CONTINUE;
            } catch (SAXException e) {
                throw new RuntimeSAXException(e);
            }
        }
    }

    public SourceCodeParser() {
    }

    public SourceCodeParser(EncodingDetector encodingDetector) {
        super(encodingDetector);
    }

    @Override // org.apache.tika.parser.Parser
    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return TYPES_TO_RENDERER.keySet();
    }

    @Override // org.apache.tika.parser.Parser
    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        AutoDetectReader autoDetectReader = new AutoDetectReader(CloseShieldInputStream.wrap(inputStream), metadata, getEncodingDetector(parseContext));
        try {
            Charset charset = autoDetectReader.getCharset();
            String str = metadata.get("Content-Type");
            String str2 = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
            if (str == null) {
                throw new TikaException("media type must be set in metadata before parse");
            }
            MediaType parse = MediaType.parse(str);
            metadata.set("Content-Type", parse.toString());
            metadata.set("Content-Encoding", charset.name());
            StringBuilder sb = new StringBuilder();
            int i = 0;
            while (true) {
                String readLine = autoDetectReader.readLine();
                if (readLine == null) {
                    metadata.set("LoC", String.valueOf(i));
                    Document parse2 = Jsoup.parse(getRenderer(parse.toString()).highlight(str2, sb.toString(), charset.name(), false));
                    parse2.quirksMode(Document.QuirksMode.quirks);
                    XHTMLContentHandler xHTMLContentHandler = new XHTMLContentHandler(contentHandler, metadata);
                    xHTMLContentHandler.startDocument();
                    try {
                        try {
                            NodeTraversor.filter(new TikaNodeFilter(xHTMLContentHandler), parse2);
                            xHTMLContentHandler.endDocument();
                            autoDetectReader.close();
                            return;
                        } catch (Throwable th) {
                            xHTMLContentHandler.endDocument();
                            throw th;
                        }
                    } catch (RuntimeSAXException e) {
                        throw e.getWrapped();
                    }
                }
                sb.append(readLine).append(System.getProperty(SystemProperties.LINE_SEPARATOR));
                String parserAuthor = parserAuthor(readLine);
                if (parserAuthor != null) {
                    metadata.add(TikaCoreProperties.CREATOR, parserAuthor);
                }
                i++;
            }
        } catch (Throwable th2) {
            try {
                autoDetectReader.close();
            } catch (Throwable th3) {
                th2.addSuppressed(th3);
            }
            throw th2;
        }
    }

    private Renderer getRenderer(String str) throws TikaException {
        String str2 = TYPES_TO_RENDERER.get(MediaType.parse(str));
        if (str2 == null) {
            throw new TikaException("unparseable content type " + str);
        }
        return XhtmlRendererFactory.getRenderer(str2);
    }

    private String parserAuthor(String str) {
        Matcher matcher = AUTHORPATTERN.matcher(str);
        if (matcher.find()) {
            return matcher.group(1).trim();
        }
        return null;
    }
}
