/*
 * Decompiled with CFR 0.152.
 */
package org.apache.cocoon.generation;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.avalon.excalibur.pool.Recyclable;
import org.apache.avalon.framework.configuration.Configurable;
import org.apache.avalon.framework.configuration.Configuration;
import org.apache.avalon.framework.configuration.ConfigurationException;
import org.apache.avalon.framework.parameters.Parameters;
import org.apache.cocoon.Constants;
import org.apache.cocoon.ProcessingException;
import org.apache.cocoon.ResourceNotFoundException;
import org.apache.cocoon.environment.SourceResolver;
import org.apache.cocoon.generation.ServiceableGenerator;
import org.apache.cocoon.util.Tokenizer;
import org.apache.regexp.RE;
import org.apache.regexp.RESyntaxException;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

public class LinkStatusGenerator
extends ServiceableGenerator
implements Recyclable,
Configurable {
    protected static final String URI = "http://apache.org/cocoon/linkstatus/2.0";
    protected static final String PREFIX = "linkstatus";
    protected static final String TOP_NODE_NAME = "linkstatus";
    protected static final String LINK_NODE_NAME = "link";
    protected static final String HREF_ATTR_NAME = "href";
    protected static final String REFERRER_ATTR_NAME = "referrer";
    protected static final String CONTENT_ATTR_NAME = "content";
    protected static final String STATUS_ATTR_NAME = "status";
    protected static final String MESSAGE_ATTR_NAME = "message";
    protected AttributesImpl attributes = new AttributesImpl();
    public static final String LINK_CONTENT_TYPE_CONFIG = "link-content-type";
    public final String LINK_CONTENT_TYPE_DEFAULT = "application/x-cocoon-links";
    public static final String LINK_VIEW_QUERY_CONFIG = "link-view-query";
    public static final String LINK_VIEW_QUERY_DEFAULT = "cocoon-view=links";
    public static final String EXCLUDE_CONFIG = "exclude";
    public static final String INCLUDE_CONFIG = "include";
    public static final String USER_AGENT_CONFIG = "user-agent";
    public static final String USER_AGENT_DEFAULT = Constants.COMPLETE_NAME;
    public static final String ACCEPT_CONFIG = "accept";
    public static final String ACCEPT_DEFAULT = "*/*";
    private String linkViewQuery = "cocoon-view=links";
    private String linkContentType = "application/x-cocoon-links";
    private HashSet excludeCrawlingURL;
    private HashSet includeCrawlingURL;
    private String userAgent = USER_AGENT_DEFAULT;
    private String accept = "*/*";
    private HashSet crawled;
    private HashSet linksToProcess;

    public void configure(Configuration configuration) throws ConfigurationException {
        String value;
        String tokenized_pattern;
        Tokenizer t;
        String pattern;
        int i;
        Configuration[] children = configuration.getChildren(INCLUDE_CONFIG);
        if (children.length > 0) {
            this.includeCrawlingURL = new HashSet();
            for (i = 0; i < children.length; ++i) {
                pattern = children[i].getValue();
                try {
                    t = new Tokenizer(pattern, ", ");
                    while (t.hasMoreTokens()) {
                        tokenized_pattern = t.nextToken();
                        this.includeCrawlingURL.add(new RE(tokenized_pattern));
                    }
                    continue;
                }
                catch (RESyntaxException rese) {
                    this.getLogger().error("Cannot create including regular-expression for " + pattern, (Throwable)rese);
                }
            }
        }
        if ((children = configuration.getChildren(EXCLUDE_CONFIG)).length > 0) {
            this.excludeCrawlingURL = new HashSet();
            for (i = 0; i < children.length; ++i) {
                pattern = children[i].getValue();
                try {
                    t = new Tokenizer(pattern, ", ");
                    while (t.hasMoreTokens()) {
                        tokenized_pattern = t.nextToken();
                        this.excludeCrawlingURL.add(new RE(tokenized_pattern));
                    }
                    continue;
                }
                catch (RESyntaxException rese) {
                    this.getLogger().error("Cannot create excluding regular-expression for " + pattern, (Throwable)rese);
                }
            }
        } else {
            this.excludeCrawlingURL = new HashSet();
            this.setDefaultExcludeFromCrawling();
        }
        Configuration child = configuration.getChild(LINK_CONTENT_TYPE_CONFIG, false);
        if (child != null && (value = child.getValue()) != null && value.length() > 0) {
            this.linkContentType = value.trim();
        }
        if ((child = configuration.getChild(LINK_VIEW_QUERY_CONFIG, false)) != null && (value = child.getValue()) != null && value.length() > 0) {
            this.linkViewQuery = value.trim();
        }
        if ((child = configuration.getChild(USER_AGENT_CONFIG, false)) != null && (value = child.getValue()) != null && value.length() > 0) {
            this.userAgent = value;
        }
        if ((child = configuration.getChild(ACCEPT_CONFIG, false)) != null && (value = child.getValue()) != null && value.length() > 0) {
            this.accept = value;
        }
    }

    public void setup(SourceResolver resolver, Map objectModel, String src, Parameters par) throws ProcessingException, SAXException, IOException {
        super.setup(resolver, objectModel, src, par);
        this.attributes = new AttributesImpl();
    }

    public void generate() throws SAXException, ProcessingException {
        try {
            this.crawled = new HashSet();
            this.linksToProcess = new HashSet();
            URL root = new URL(this.source);
            this.linksToProcess.add(new Link(root, ""));
            if (this.getLogger().isDebugEnabled()) {
                this.getLogger().debug("crawl URL " + root);
            }
            this.contentHandler.startDocument();
            this.contentHandler.startPrefixMapping("linkstatus", URI);
            this.attributes.clear();
            this.contentHandler.startElement(URI, "linkstatus", "linkstatus:linkstatus", this.attributes);
            while (this.linksToProcess.size() > 0) {
                List url_links;
                Iterator i = this.linksToProcess.iterator();
                if (!i.hasNext()) continue;
                Link link = (Link)i.next();
                URL url = link.getURL();
                this.linksToProcess.remove(link);
                String new_url_link = this.processURL(url, link.getReferrer());
                if (new_url_link == null || (url_links = this.getLinksFromConnection(new_url_link, url)) == null) continue;
                this.linksToProcess.addAll(url_links);
            }
            this.contentHandler.endElement(URI, "linkstatus", "linkstatus:linkstatus");
            this.contentHandler.endPrefixMapping("linkstatus");
            this.contentHandler.endDocument();
        }
        catch (IOException ioe) {
            this.getLogger().warn("Could not read source ", (Throwable)ioe);
            throw new ResourceNotFoundException("Could not read source ", ioe);
        }
    }

    private void setDefaultExcludeFromCrawling() {
        String[] EXCLUDE_FROM_CRAWLING_DEFAULT = new String[]{".*\\.gif(\\?.*)?$", ".*\\.png(\\?.*)?$", ".*\\.jpe?g(\\?.*)?$", ".*\\.js(\\?.*)?$", ".*\\.css(\\?.*)?$"};
        for (int i = 0; i < EXCLUDE_FROM_CRAWLING_DEFAULT.length; ++i) {
            String pattern = EXCLUDE_FROM_CRAWLING_DEFAULT[i];
            try {
                this.excludeCrawlingURL.add(new RE(pattern));
                continue;
            }
            catch (RESyntaxException rese) {
                this.getLogger().error("Cannot create excluding regular-expression for " + pattern, (Throwable)rese);
            }
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     * Loose catch block
     * Enabled force condition propagation
     * Lifted jumps to return sites
     */
    protected List getLinksFromConnection(String url_link_string, URL url_of_referrer) {
        String content_type;
        URLConnection conn;
        URL url_link;
        BufferedReader br;
        ArrayList<Link> url_links;
        block18: {
            url_links = null;
            br = null;
            url_link = new URL(url_link_string);
            conn = url_link.openConnection();
            content_type = conn.getContentType();
            if (content_type != null) break block18;
            this.getLogger().warn("No content type available for " + String.valueOf(url_link_string));
            ArrayList<Link> arrayList = url_links;
            Object var15_11 = null;
            if (br == null) return arrayList;
            try {
                br.close();
                return arrayList;
            }
            catch (IOException ignored) {
                // empty catch block
            }
            return arrayList;
        }
        if (this.getLogger().isDebugEnabled()) {
            this.getLogger().debug("Content-type: " + content_type);
        }
        if (content_type.equals(this.linkContentType)) {
            String line;
            url_links = new ArrayList<Link>();
            InputStream is = conn.getInputStream();
            br = new BufferedReader(new InputStreamReader(is));
            String referrer = url_of_referrer.toString();
            while ((line = br.readLine()) != null) {
                URL new_url = new URL(url_link, line);
                boolean add_url = true;
                if (add_url) {
                    add_url &= !url_links.contains(new_url);
                }
                if (add_url) {
                    add_url &= !this.crawled.contains(new_url.toString());
                }
                Link new_link = new Link(new_url, referrer);
                if (add_url) {
                    add_url &= !this.linksToProcess.contains(new_link);
                }
                if (add_url) {
                    add_url &= this.isIncludedURL(new_url.toString());
                }
                if (!add_url) continue;
                if (this.getLogger().isDebugEnabled()) {
                    this.getLogger().debug("Add URL: " + new_url.toString());
                }
                url_links.add(new_link);
            }
        }
        Object var15_12 = null;
        if (br == null) return url_links;
        try {
            br.close();
            br = null;
            return url_links;
        }
        catch (IOException ignored) {}
        return url_links;
        {
            catch (IOException ioe) {
                this.getLogger().warn("Problems get links of " + url_link_string, (Throwable)ioe);
                Object var15_13 = null;
                if (br == null) return url_links;
                try {
                    br.close();
                    br = null;
                    return url_links;
                }
                catch (IOException ignored) {}
                return url_links;
            }
        }
        catch (Throwable throwable) {
            Object var15_14 = null;
            if (br == null) throw throwable;
            try {
                br.close();
                br = null;
                throw throwable;
            }
            catch (IOException ignored) {
                // empty catch block
            }
            throw throwable;
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected String processURL(URL url, String referrer) throws SAXException {
        if (this.getLogger().isDebugEnabled()) {
            this.getLogger().debug("getLinks URL " + url);
        }
        String result = null;
        if (this.crawled.contains(url.toString())) {
            return null;
        }
        this.crawled.add(url.toString());
        this.attributes.clear();
        this.attributes.addAttribute("", HREF_ATTR_NAME, HREF_ATTR_NAME, "CDATA", url.toString());
        this.attributes.addAttribute("", REFERRER_ATTR_NAME, REFERRER_ATTR_NAME, "CDATA", referrer);
        HttpURLConnection h = null;
        try {
            URLConnection links_url_connection = url.openConnection();
            h = (HttpURLConnection)links_url_connection;
            String content_type = links_url_connection.getContentType();
            this.attributes.addAttribute("", CONTENT_ATTR_NAME, CONTENT_ATTR_NAME, "CDATA", content_type);
            this.attributes.addAttribute("", MESSAGE_ATTR_NAME, MESSAGE_ATTR_NAME, "CDATA", h.getResponseMessage());
            this.attributes.addAttribute("", STATUS_ATTR_NAME, STATUS_ATTR_NAME, "CDATA", String.valueOf(h.getResponseCode()));
        }
        catch (IOException ioe) {
            this.attributes.addAttribute("", MESSAGE_ATTR_NAME, MESSAGE_ATTR_NAME, "CDATA", ioe.getMessage());
        }
        finally {
            if (h != null) {
                h.disconnect();
            }
        }
        if (!this.isExcludedURL(url.toString()) && this.isIncludedURL(url.toString())) {
            result = url.toExternalForm() + (url.toExternalForm().indexOf("?") == -1 ? "?" : "&") + this.linkViewQuery;
        }
        this.contentHandler.startElement(URI, LINK_NODE_NAME, "linkstatus:link", this.attributes);
        this.contentHandler.endElement(URI, LINK_NODE_NAME, "linkstatus:link");
        return result;
    }

    private boolean isExcludedURL(String url) {
        if (this.excludeCrawlingURL == null) {
            if (this.getLogger().isDebugEnabled()) {
                this.getLogger().debug("exclude no URL " + url);
            }
            return false;
        }
        String s = url.toString();
        Iterator i = this.excludeCrawlingURL.iterator();
        while (i.hasNext()) {
            RE pattern = (RE)i.next();
            if (!pattern.match(s)) continue;
            if (this.getLogger().isDebugEnabled()) {
                this.getLogger().debug("exclude URL " + url);
            }
            return true;
        }
        if (this.getLogger().isDebugEnabled()) {
            this.getLogger().debug("exclude not URL " + url);
        }
        return false;
    }

    private boolean isIncludedURL(String url) {
        if (this.includeCrawlingURL == null) {
            if (this.getLogger().isDebugEnabled()) {
                this.getLogger().debug("include all URL " + url);
            }
            return true;
        }
        String s = url.toString();
        Iterator i = this.includeCrawlingURL.iterator();
        while (i.hasNext()) {
            RE pattern = (RE)i.next();
            if (!pattern.match(s)) continue;
            if (this.getLogger().isDebugEnabled()) {
                this.getLogger().debug("include URL " + url);
            }
            return true;
        }
        if (this.getLogger().isDebugEnabled()) {
            this.getLogger().debug("include not URL " + url);
        }
        return false;
    }

    public void recycle() {
        super.recycle();
        this.attributes = null;
    }

    private class Link {
        private URL url;
        private String referrer;

        public Link(URL url, String referrer) {
            this.url = url;
            this.referrer = referrer;
        }

        public URL getURL() {
            return this.url;
        }

        public String getReferrer() {
            return this.referrer;
        }

        public boolean equals(Link l) {
            return this.url.equals(l.getURL());
        }
    }
}

