/*
 * Decompiled with CFR 0.152.
 */
package org.archive.extract;

import com.google.common.io.CountingOutputStream;
import com.google.common.io.NullOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.List;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.archive.extract.ExtractorOutput;
import org.archive.format.gzip.GZIPFormatException;
import org.archive.format.json.JSONUtils;
import org.archive.format.json.SimpleJSONPathSpec;
import org.archive.resource.MetaData;
import org.archive.resource.Resource;
import org.archive.url.URLKeyMaker;
import org.archive.url.WaybackURLKeyMaker;
import org.archive.util.IAUtils;
import org.archive.util.StreamCopy;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;

public class RealCDXExtractorOutput
implements ExtractorOutput {
    private static final Logger LOG = Logger.getLogger(RealCDXExtractorOutput.class.getName());
    public static final String X_ROBOTS_HTTP_HEADER = "X-Robots-Tag";
    private PrintWriter out;
    SimpleJSONPathSpec filenameSpec = new SimpleJSONPathSpec("Container.Filename");
    SimpleJSONPathSpec offsetSpec = new SimpleJSONPathSpec("Container.Offset");
    SimpleJSONPathSpec gzDeflateLengthSpec = new SimpleJSONPathSpec("Container.Gzip-Metadata.Deflate-Length");
    SimpleJSONPathSpec formatSpec = new SimpleJSONPathSpec("Envelope.Format");
    SimpleJSONPathSpec arcURL = new SimpleJSONPathSpec("Envelope.ARC-Header-Metadata.Target-URI");
    SimpleJSONPathSpec arcDate = new SimpleJSONPathSpec("Envelope.ARC-Header-Metadata.Date");
    SimpleJSONPathSpec arcContentType = new SimpleJSONPathSpec("Envelope.ARC-Header-Metadata.Content-Type");
    SimpleJSONPathSpec warcURL = new SimpleJSONPathSpec("Envelope.WARC-Header-Metadata.WARC-Target-URI");
    SimpleJSONPathSpec warcDate = new SimpleJSONPathSpec("Envelope.WARC-Header-Metadata.WARC-Date");
    SimpleJSONPathSpec warcType = new SimpleJSONPathSpec("Envelope.WARC-Header-Metadata.WARC-Type");
    SimpleJSONPathSpec warcContentType = new SimpleJSONPathSpec("Envelope.WARC-Header-Metadata.Content-Type");
    SimpleJSONPathSpec envBlockDigest = new SimpleJSONPathSpec("Envelope.Block-Digest");
    SimpleJSONPathSpec warcPayloadDigest = new SimpleJSONPathSpec("Envelope.WARC-Header-Metadata.WARC-Payload-Digest");
    SimpleJSONPathSpec httpResponseCode = new SimpleJSONPathSpec("Envelope.Payload-Metadata.HTTP-Response-Metadata.Response-Message.Status");
    SimpleJSONPathSpec httpEntityDigest = new SimpleJSONPathSpec("Envelope.Payload-Metadata.HTTP-Response-Metadata.Entity-Digest");
    SimpleJSONPathSpec HTTPLocation = new SimpleJSONPathSpec("Envelope.Payload-Metadata.HTTP-Response-Metadata.Headers");
    private static final Pattern refreshURLPattern = Pattern.compile("^\\d+\\s*;\\s*url\\s*=\\s*(.+?)\\s*$", 10);
    private boolean dumpJSON = false;
    private URLKeyMaker keyMaker;
    private static String NO_NOTHIN_MATCH = "NONE";
    private static String NO_FOLLOW_MATCH = "NOFOLLOW";
    private static String NO_INDEX_MATCH = "NOINDEX";
    private static String NO_ARCHIVE_MATCH = "NOARCHIVE";

    public RealCDXExtractorOutput(PrintWriter out, URLKeyMaker keyMaker) {
        this.out = out;
        this.keyMaker = keyMaker;
        out.println(" CDX N b a m s k r M S V g");
        out.flush();
    }

    public RealCDXExtractorOutput(PrintWriter out) {
        this(out, new WaybackURLKeyMaker());
    }

    @Override
    public void output(Resource resource) throws IOException {
        NullOutputStream nullo = new NullOutputStream();
        CountingOutputStream co = new CountingOutputStream((OutputStream)nullo);
        try {
            StreamCopy.copy(resource.getInputStream(), (OutputStream)co);
        }
        catch (GZIPFormatException e) {
            e.printStackTrace();
            return;
        }
        long bytes = co.getCount();
        if (bytes > 0L) {
            LOG.info(bytes + " unconsumed bytes in Resource InputStream.");
        }
        try {
            MetaData m = resource.getMetaData().getTopMetaData();
            String filename = this.getContainerFilename(m);
            String offset = this.getContainerOffset(m);
            String gzLen = this.getGZLength(m);
            String format = this.getEnvelopeFormat(m);
            String origUrl = "TBD";
            String date = "TBD";
            String canUrl = "TBD";
            String mime = "TBD";
            String httpCode = "TBD";
            String digest = "TBD";
            String meta = "TBD";
            String redir = "TBD";
            if (format.equals("WARC")) {
                origUrl = this.getWARCURL(m);
                date = this.getWARCDate(m);
                String type = this.getWARCType(m);
                if (type.equals("response")) {
                    String recType = this.getWARCContentType(m);
                    if (recType.equals("text/dns")) {
                        redir = "-";
                        meta = "-";
                        httpCode = "-";
                        mime = recType;
                        digest = this.getEnvelopeBlockDigest(m);
                    } else if (recType.equals("application/http; msgtype=response")) {
                        httpCode = this.getHTTPStatus(m);
                        digest = this.getHTTPEntityDigest(m);
                        JSONObject headers = JSONUtils.extractObject(m, "Envelope.Payload-Metadata.HTTP-Response-Metadata.Headers");
                        mime = this.normalizeHTTPMime(this.scanHeadersLC(headers, "content-type", "unk"));
                        redir = this.scanHeadersLC(headers, "location", "-");
                        meta = this.scanHeadersLC(headers, X_ROBOTS_HTTP_HEADER, null);
                        meta = meta != null ? this.parseRobotInstructions(meta) : "-";
                        if (mime.toLowerCase().contains("html")) {
                            if (redir.equals("-")) {
                                redir = this.extractHTMLMetaRefresh(origUrl, m);
                            }
                            if (meta.equals("-")) {
                                meta = this.extractHTMLRobots(m);
                            }
                        }
                    }
                } else if (type.equals("warcinfo")) {
                    origUrl = "warcinfo:/" + filename + "/" + IAUtils.COMMONS_VERSION.replaceAll(" ", "_");
                    redir = "-";
                    meta = "-";
                    httpCode = "-";
                    mime = "warc-info";
                    digest = this.getEnvelopeBlockDigest(m);
                } else if (type.equals("request")) {
                    mime = "warc/request";
                    redir = "-";
                    meta = "-";
                    httpCode = "-";
                    digest = this.getEnvelopeBlockDigest(m);
                } else if (type.equals("metadata")) {
                    mime = "warc/metadata";
                    redir = "-";
                    meta = "-";
                    httpCode = "-";
                    digest = this.getEnvelopeBlockDigest(m);
                } else if (type.equals("revisit")) {
                    mime = "warc/revisit";
                    redir = "-";
                    meta = "-";
                    httpCode = "-";
                    digest = this.getWARCPayloadDigest(m);
                }
            } else if (format.equals("ARC")) {
                origUrl = this.getARCURL(m);
                date = this.getARCDate(m);
                if (origUrl.startsWith("filedesc:")) {
                    origUrl = "filedesc:/" + filename + "/" + IAUtils.COMMONS_VERSION.replaceAll(" ", "_");
                    mime = "arc-filedesc";
                    redir = "-";
                    meta = "-";
                    httpCode = "-";
                    digest = this.getEnvelopeBlockDigest(m);
                } else {
                    mime = this.getARCContentType(m);
                    if (mime.equals("alexa/dat")) {
                        redir = "-";
                        meta = "-";
                        httpCode = "-";
                        digest = this.getEnvelopeBlockDigest(m);
                    } else {
                        httpCode = this.getHTTPStatus(m);
                        digest = this.getHTTPEntityDigest(m);
                        JSONObject headers = JSONUtils.extractObject(m, "Envelope.Payload-Metadata.HTTP-Response-Metadata.Headers");
                        mime = this.normalizeHTTPMime(this.scanHeadersLC(headers, "content-type", "unk"));
                        redir = this.scanHeadersLC(headers, "location", "-");
                        meta = this.scanHeadersLC(headers, X_ROBOTS_HTTP_HEADER, null);
                        meta = meta != null ? this.parseRobotInstructions(meta) : "-";
                        if (mime.toLowerCase().contains("html")) {
                            if (redir.equals("-")) {
                                redir = this.extractHTMLMetaRefresh(origUrl, m);
                            }
                            if (meta.equals("-")) {
                                meta = this.extractHTMLRobots(m);
                            }
                        }
                    }
                }
            }
            if (!redir.equals("-")) {
                redir = this.resolve(origUrl, redir);
            }
            canUrl = this.keyMaker.makeKey(origUrl);
            if (this.dumpJSON) {
                this.out.format("%s %s %s %s %s %s %s %s %s %s %s %s\n", canUrl, date, origUrl, mime, httpCode, digest, redir, meta, gzLen, offset, filename, m.toString(1));
            } else {
                this.out.format("%s %s %s %s %s %s %s %s %s %s %s\n", canUrl, date, origUrl, mime, httpCode, digest, redir, meta, gzLen, offset, filename);
            }
        }
        catch (JSONException e) {
            throw new IOException(e);
        }
        catch (URISyntaxException e) {
            throw new IOException(e);
        }
        this.out.flush();
    }

    private String extractHTMLRobots(MetaData m) {
        JSONArray metas = JSONUtils.extractArray(m, "Envelope.Payload-Metadata.HTTP-Response-Metadata.HTML-Metadata.Head.Metas");
        if (metas != null) {
            int count = metas.length();
            for (int i = 0; i < count; ++i) {
                String content;
                String name;
                JSONObject meta = metas.optJSONObject(i);
                if (meta == null || (name = this.scanHeadersLC(meta, "name", null)) == null || !name.toLowerCase().equals("robots") || (content = this.scanHeadersLC(meta, "content", null)) == null) continue;
                return this.parseRobotInstructions(content);
            }
        }
        return "-";
    }

    private String extractHTMLMetaRefresh(String origUrl, MetaData m) {
        JSONArray metas = JSONUtils.extractArray(m, "Envelope.Payload-Metadata.HTTP-Response-Metadata.HTML-Metadata.Head.Metas");
        if (metas != null) {
            int count = metas.length();
            for (int i = 0; i < count; ++i) {
                String fragment;
                String content;
                String name;
                JSONObject meta = metas.optJSONObject(i);
                if (meta == null || (name = this.scanHeadersLC(meta, "http-equiv", null)) == null || !name.toLowerCase().equals("refresh") || (content = this.scanHeadersLC(meta, "content", null)) == null || (fragment = this.parseMetaRefreshContent(content)) == null) continue;
                return fragment;
            }
        }
        return "-";
    }

    private String resolve(String context, String spec) {
        try {
            URL cUrl = new URL(context);
            URL resolved = new URL(cUrl, spec);
            return resolved.toURI().toASCIIString();
        }
        catch (URISyntaxException e) {
        }
        catch (MalformedURLException e) {
        }
        catch (NullPointerException nullPointerException) {
            // empty catch block
        }
        return spec;
    }

    private String scanHeadersLC(JSONObject o, String match, String defaultVal) {
        if (o != null) {
            if (o.length() == 0) {
                return defaultVal;
            }
            String lc = match.toLowerCase().trim();
            for (String key : JSONObject.getNames((JSONObject)o)) {
                if (!lc.equals(key.toLowerCase().trim())) continue;
                try {
                    return o.getString(key).trim();
                }
                catch (JSONException e) {
                    e.printStackTrace();
                    return defaultVal;
                }
            }
        }
        return defaultVal;
    }

    private String getContainerFilename(MetaData m) {
        return this.unwrapFirst(this.filenameSpec.extract(m), "-");
    }

    private String getContainerOffset(MetaData m) {
        return this.unwrapFirst(this.offsetSpec.extract(m), "-");
    }

    private String getGZLength(MetaData m) {
        return this.unwrapFirst(this.gzDeflateLengthSpec.extract(m), "-");
    }

    private String getEnvelopeFormat(MetaData m) {
        return this.unwrapFirst(this.formatSpec.extract(m), "-");
    }

    private String getWARCURL(MetaData m) {
        return this.unwrapFirst(this.warcURL.extract(m), "-");
    }

    private String getWARCDate(MetaData m) {
        return this.normalizeWARCDate(this.unwrapFirst(this.warcDate.extract(m), "-"));
    }

    private String getWARCType(MetaData m) {
        return this.unwrapFirst(this.warcType.extract(m), "-");
    }

    private String getWARCPayloadDigest(MetaData m) {
        return this.normalizeSHA1(this.unwrapFirst(this.warcPayloadDigest.extract(m), "-"));
    }

    private String getHTTPStatus(MetaData m) {
        return this.unwrapFirst(this.httpResponseCode.extract(m), "-");
    }

    private String getWARCContentType(MetaData m) {
        return this.unwrapFirst(this.warcContentType.extract(m), "-");
    }

    private String getEnvelopeBlockDigest(MetaData m) {
        return this.normalizeSHA1(this.unwrapFirst(this.envBlockDigest.extract(m), "-"));
    }

    private String getHTTPEntityDigest(MetaData m) {
        return this.normalizeSHA1(this.unwrapFirst(this.httpEntityDigest.extract(m), "-"));
    }

    private String getARCURL(MetaData m) {
        return this.unwrapFirst(this.arcURL.extract(m), "-");
    }

    private String getARCDate(MetaData m) {
        return this.unwrapFirst(this.arcDate.extract(m), "-");
    }

    private String getARCContentType(MetaData m) {
        return this.normalizeHTTPMime(this.unwrapFirst(this.arcContentType.extract(m), "-"));
    }

    public String normalizeSHA1(String sha1) {
        if (sha1.startsWith("sha1:")) {
            return sha1.substring(5);
        }
        return sha1;
    }

    public String normalizeWARCDate(String date) {
        if (date == null) {
            return "-";
        }
        if (date.length() != 20) {
            return date;
        }
        char[] norm = new char[]{date.charAt(0), date.charAt(1), date.charAt(2), date.charAt(3), date.charAt(5), date.charAt(6), date.charAt(8), date.charAt(9), date.charAt(11), date.charAt(12), date.charAt(14), date.charAt(15), date.charAt(17), date.charAt(18)};
        return new String(norm);
    }

    private String escapeSpaces(String input) {
        if (input.contains(" ")) {
            return input.replace(" ", "%20");
        }
        return input;
    }

    public String normalizeHTTPMime(String input) {
        if (input == null) {
            return null;
        }
        int semiIdx = input.indexOf(";");
        if (semiIdx > 0) {
            return this.escapeSpaces(input.substring(0, semiIdx).trim());
        }
        return this.escapeSpaces(input.trim());
    }

    private String unwrapFirst(List<List<String>> l, String defaultValue) {
        String v;
        if (l != null && l.size() > 0 && l.get(0) != null && l.get(0).size() > 0 && (v = l.get(0).get(0)) != null && v.length() > 0) {
            return v;
        }
        return defaultValue;
    }

    private String parseRobotInstructions(String input) {
        if (input == null) {
            return "-";
        }
        String up = input.replaceAll("-", "").toUpperCase();
        StringBuilder sb = new StringBuilder(3);
        if (up.contains(NO_FOLLOW_MATCH)) {
            sb.append("F");
        }
        if (up.contains(NO_ARCHIVE_MATCH)) {
            sb.append("A");
        }
        if (up.contains(NO_INDEX_MATCH)) {
            sb.append("I");
        }
        if (up.contains(NO_NOTHIN_MATCH)) {
            sb.setLength(0);
            sb.append("AIF");
        }
        return sb.length() == 0 ? "-" : sb.toString();
    }

    private String parseMetaRefreshContent(String content) {
        Matcher m = refreshURLPattern.matcher(content);
        if (m.matches() && m.groupCount() == 1) {
            return m.group(1);
        }
        return "-";
    }
}

