package de.l3s.boilerpipe.sax;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.util.zip.GZIPInputStream;

/* loaded from: input_file:de/l3s/boilerpipe/sax/HTMLFetcher.class */
public class HTMLFetcher {
    private HTMLFetcher() {
    }

    public static HTMLDocument fetch(URL url) throws IOException {
        URLConnection openConnection = url.openConnection();
        String contentEncoding = openConnection.getContentEncoding();
        Charset forName = Charset.forName("Cp1252");
        if (contentEncoding != null) {
            try {
                forName = Charset.forName(contentEncoding);
            } catch (UnsupportedCharsetException e) {
            }
        }
        InputStream inputStream = openConnection.getInputStream();
        String contentEncoding2 = openConnection.getContentEncoding();
        if (contentEncoding2 != null) {
            if ("gzip".equalsIgnoreCase(contentEncoding2)) {
                inputStream = new GZIPInputStream(inputStream);
            } else {
                System.err.println("WARN: unsupported Content-Encoding: " + contentEncoding2);
            }
        }
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        byte[] bArr = new byte[4096];
        while (true) {
            int read = inputStream.read(bArr);
            if (read == -1) {
                inputStream.close();
                return new HTMLDocument(byteArrayOutputStream.toByteArray(), forName);
            }
            byteArrayOutputStream.write(bArr, 0, read);
        }
    }
}
