亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關于我們
? 蟲蟲下載站

?? preconditionenforcer.java

?? 爬蟲
?? JAVA
?? 第 1 頁 / 共 2 頁
字號:
/* Copyright (C) 2003 Internet Archive. * * This file is part of the Heritrix web crawler (crawler.archive.org). * * Heritrix is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * Heritrix is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with Heritrix; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * * SimplePolitenessEnforcer.java * Created on May 22, 2003 * * $Header: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/java/org/archive/crawler/prefetch/PreconditionEnforcer.java,v 1.25 2006/08/30 21:24:45 stack-sf Exp $ */package org.archive.crawler.prefetch;import java.util.Iterator;import java.util.Set;import java.util.logging.Level;import java.util.logging.Logger;import javax.management.AttributeNotFoundException;import org.apache.commons.httpclient.URIException;import org.archive.crawler.datamodel.CoreAttributeConstants;import org.archive.crawler.datamodel.CrawlHost;import org.archive.crawler.datamodel.CrawlServer;import org.archive.crawler.datamodel.CrawlURI;import org.archive.crawler.datamodel.CredentialStore;import org.archive.crawler.datamodel.FetchStatusCodes;import org.archive.crawler.datamodel.credential.Credential;import org.archive.crawler.datamodel.credential.CredentialAvatar;import org.archive.crawler.framework.Processor;import org.archive.crawler.settings.SimpleType;import org.archive.crawler.settings.Type;import org.archive.net.UURI;/** * Ensures the preconditions for a fetch -- such as DNS lookup  * or acquiring and respecting a robots.txt policy -- are * satisfied before a URI is passed to subsequent stages. * * @author gojomo */public class PreconditionEnforcer        extends Processor        implements CoreAttributeConstants, FetchStatusCodes {    private static final Logger logger =        Logger.getLogger(PreconditionEnforcer.class.getName());    private final static Integer DEFAULT_IP_VALIDITY_DURATION =         new Integer(60*60*6); // six hours     private final static Integer DEFAULT_ROBOTS_VALIDITY_DURATION =        new Integer(60*60*24); // one day    /** seconds to keep IP information for */    public final static String ATTR_IP_VALIDITY_DURATION        = "ip-validity-duration-seconds";    /** seconds to cache robots info */    public final static String ATTR_ROBOTS_VALIDITY_DURATION        = "robot-validity-duration-seconds";    /** whether to calculate robots exclusion without applying */    public final static Boolean DEFAULT_CALCULATE_ROBOTS_ONLY = Boolean.FALSE;    public final static String ATTR_CALCULATE_ROBOTS_ONLY         = "calculate-robots-only";        public PreconditionEnforcer(String name) {        super(name, "Precondition enforcer");        Type e;        e = addElementToDefinition(new SimpleType(ATTR_IP_VALIDITY_DURATION,                "The minimum interval for which a dns-record will be considered " +                "valid (in seconds). " +                "If the record's DNS TTL is larger, that will be used instead.",                DEFAULT_IP_VALIDITY_DURATION));        e.setExpertSetting(true);        e = addElementToDefinition(new SimpleType(ATTR_ROBOTS_VALIDITY_DURATION,                "The time in seconds that fetched robots.txt information is " +                "considered to be valid. " +                "If the value is set to '0', then the robots.txt information" +                " will never expire.",                DEFAULT_ROBOTS_VALIDITY_DURATION));        e.setExpertSetting(true);                e = addElementToDefinition(new SimpleType(ATTR_CALCULATE_ROBOTS_ONLY,                "Whether to only calculate the robots status of an URI, " +                "without actually applying any exclusions found. If true, " +                "exlcuded URIs will only be annotated in the crawl.log, but " +                "still fetched. Default is false. ",                DEFAULT_CALCULATE_ROBOTS_ONLY));        e.setExpertSetting(true);    }    protected void innerProcess(CrawlURI curi) {        if (considerDnsPreconditions(curi)) {            return;        }        // make sure we only process schemes we understand (i.e. not dns)        String scheme = curi.getUURI().getScheme().toLowerCase();        if (! (scheme.equals("http") || scheme.equals("https"))) {            logger.fine("PolitenessEnforcer doesn't understand uri's of type " +                scheme + " (ignoring)");            return;        }        if (considerRobotsPreconditions(curi)) {            return;        }        if (!curi.isPrerequisite() && credentialPrecondition(curi)) {            return;        }        // OK, it's allowed        // For all curis that will in fact be fetched, set appropriate delays.        // TODO: SOMEDAY: allow per-host, per-protocol, etc. factors        // curi.setDelayFactor(getDelayFactorFor(curi));        // curi.setMinimumDelay(getMinimumDelayFor(curi));        return;    }    /**     * Consider the robots precondition.     *     * @param curi CrawlURI we're checking for any required preconditions.     * @return True, if this <code>curi</code> has a precondition or processing     *         should be terminated for some other reason.  False if     *         we can precede to process this url.     */    private boolean considerRobotsPreconditions(CrawlURI curi) {//        // treat /robots.txt fetches specially//        UURI uuri = curi.getUURI();//        try {//            if (uuri != null && uuri.getPath() != null &&//                    curi.getUURI().getPath().equals("/robots.txt")) {//                // allow processing to continue//                curi.setPrerequisite(true);//                return false;//            }//        }//        catch (URIException e) {//            logger.severe("Failed get of path for " + curi);//        }//        // require /robots.txt if not present//        if (isRobotsExpired(curi)) {//        	// Need to get robots//            if (logger.isLoggable(Level.FINE)) {//                logger.fine( "No valid robots for " +//                    getController().getServerCache().getServerFor(curi) +//                    "; deferring " + curi);//            }////            // Robots expired - should be refetched even though its already//            // crawled.//            try {//                String prereq = curi.getUURI().resolve("/robots.txt").toString();//                curi.markPrerequisite(prereq,//                    getController().getPostprocessorChain());//            }//            catch (URIException e1) {//                logger.severe("Failed resolve using " + curi);//                throw new RuntimeException(e1); // shouldn't ever happen//            }//            return true;//        }//        // test against robots.txt if available//        CrawlServer cs = getController().getServerCache().getServerFor(curi);//        if(cs.isValidRobots()){//            String ua = getController().getOrder().getUserAgent(curi);//            if(cs.getRobots().disallows(curi, ua)) {//                if(((Boolean)getUncheckedAttribute(curi,ATTR_CALCULATE_ROBOTS_ONLY)).booleanValue() == true) {//                    // annotate URI as excluded, but continue to process normally//                    curi.addAnnotation("robotExcluded");//                    return false; //                }//                // mark as precluded; in FetchHTTP, this will//                // prevent fetching and cause a skip to the end//                // of processing (unless an intervening processor//                // overrules)//                curi.setFetchStatus(S_ROBOTS_PRECLUDED);//                curi.putString("error","robots.txt exclusion");//                logger.fine("robots.txt precluded " + curi);//                return true;//            }//            return false;//        }//        // No valid robots found => Attempt to get robots.txt failed//        curi.skipToProcessorChain(getController().getPostprocessorChain());//        curi.setFetchStatus(S_ROBOTS_PREREQUISITE_FAILURE);//        curi.putString("error","robots.txt prerequisite failed");//        if (logger.isLoggable(Level.FINE)) {//            logger.fine("robots.txt prerequisite failed " + curi);//        }//        return true;    	return false;    }    /**     * @param curi CrawlURI whose dns prerequisite we're to check.     * @return true if no further processing in this module should occur     */    private boolean considerDnsPreconditions(CrawlURI curi) {        if(curi.getUURI().getScheme().equals("dns")){            // DNS URIs never have a DNS precondition            curi.setPrerequisite(true);            return false;         }                CrawlServer cs = getController().getServerCache().getServerFor(curi);        if(cs == null) {            curi.setFetchStatus(S_UNFETCHABLE_URI);            curi.skipToProcessorChain(getController().getPostprocessorChain());            return true;        }        // If we've done a dns lookup and it didn't resolve a host        // cancel further fetch-processing of this URI, because        // the domain is unresolvable        CrawlHost ch = getController().getServerCache().getHostFor(curi);        if (ch == null || ch.hasBeenLookedUp() && ch.getIP() == null) {            if (logger.isLoggable(Level.FINE)) {                logger.fine( "no dns for " + ch +                    " cancelling processing for CrawlURI " + curi.toString());            }            curi.setFetchStatus(S_DOMAIN_PREREQUISITE_FAILURE);            curi.skipToProcessorChain(getController().getPostprocessorChain());            return true;        }        // If we haven't done a dns lookup  and this isn't a dns uri        // shoot that off and defer further processing        if (isIpExpired(curi) && !curi.getUURI().getScheme().equals("dns")) {

?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
中文字幕在线一区二区三区| 亚洲免费在线视频| 99国产精品一区| 日本大胆欧美人术艺术动态| 综合激情成人伊人| 欧美变态tickle挠乳网站| 成人av在线一区二区三区| 一区二区三区精品在线| 欧美日韩一区视频| 成人国产亚洲欧美成人综合网| 日韩福利电影在线| 亚洲视频资源在线| 欧美韩国日本不卡| 久久综合999| 日韩一区二区三区电影| 91黄视频在线| 不卡电影一区二区三区| 国产一区二区不卡在线| 午夜欧美一区二区三区在线播放| 中文字幕一区二区日韩精品绯色| 精品av综合导航| 制服丝袜亚洲色图| 欧美日韩免费一区二区三区视频| 99精品视频在线免费观看| 国产成人精品影院| 国产精品自在在线| 黄色成人免费在线| 美女mm1313爽爽久久久蜜臀| 天天综合天天综合色| 亚洲成人精品影院| 午夜电影一区二区三区| 亚洲图片欧美色图| 亚洲图片欧美色图| 亚洲国产精品久久久久秋霞影院| 亚洲黄一区二区三区| 亚洲欧洲日韩综合一区二区| 国产精品视频你懂的| 欧美国产一区视频在线观看| 国产欧美综合色| 国产婷婷色一区二区三区| 亚洲精品一线二线三线无人区| 欧美一级精品大片| 欧美大黄免费观看| 精品久久一二三区| 久久精品亚洲麻豆av一区二区| 久久精品一区二区三区不卡| 久久久精品日韩欧美| 日本一区二区三区dvd视频在线| 久久久久久97三级| 国产精品激情偷乱一区二区∴| 国产精品免费看片| 亚洲乱码中文字幕| 亚洲va在线va天堂| 蜜臀久久99精品久久久久宅男| 六月婷婷色综合| 国产激情一区二区三区桃花岛亚洲| 韩国av一区二区三区四区| 国产成人午夜片在线观看高清观看| 国产成人在线视频播放| 99久久精品国产毛片| 欧美午夜精品免费| 欧美一区二区久久久| 久久久青草青青国产亚洲免观| 欧美激情一二三区| 亚洲国产精品综合小说图片区| 日韩激情一二三区| 国产黄色精品网站| 色婷婷久久久综合中文字幕| 欧美日韩精品高清| 久久综合狠狠综合久久激情| 国产精品久久久久一区| 亚洲制服丝袜在线| 久久99蜜桃精品| 成人免费毛片片v| 欧美日韩三级在线| 久久久久久久国产精品影院| 有码一区二区三区| 久久国产尿小便嘘嘘| 成人午夜短视频| 欧美精品xxxxbbbb| 国产精品久久毛片a| 首页欧美精品中文字幕| 国产成人精品免费视频网站| 91久久精品国产91性色tv| 欧美成人欧美edvon| 亚洲欧洲制服丝袜| 韩国一区二区在线观看| 色综合天天综合网天天狠天天| 欧美一区二区三区男人的天堂| 欧美极品少妇xxxxⅹ高跟鞋 | 一区二区三区在线免费| 日韩av不卡一区二区| 国产·精品毛片| 91 com成人网| 亚洲欧洲av在线| 捆绑变态av一区二区三区| 93久久精品日日躁夜夜躁欧美| 欧美一区午夜精品| 亚洲精品综合在线| 国产乱码精品一品二品| 在线成人小视频| 亚洲欧美激情在线| 国产麻豆日韩欧美久久| 欧美日本视频在线| 亚洲欧美另类小说视频| 国产久卡久卡久卡久卡视频精品| 精品视频在线看| 亚洲欧洲精品一区二区精品久久久| 蜜臀av一区二区在线免费观看| av福利精品导航| 久久久精品国产免费观看同学| 亚洲h在线观看| 色综合久久久久久久| 亚洲国产精品成人综合| 六月丁香综合在线视频| 91精品国产一区二区| 亚洲最新在线观看| 91色视频在线| 中文字幕一区二区三区视频| 粉嫩一区二区三区性色av| 久久亚洲私人国产精品va媚药| 石原莉奈在线亚洲三区| 欧美日韩亚洲综合| 亚洲国产一区二区视频| 一本色道综合亚洲| 亚洲图片另类小说| 成人高清视频在线观看| 中文字幕av一区二区三区| 国产最新精品精品你懂的| 精品欧美乱码久久久久久1区2区| 亚洲国产一区二区在线播放| 欧美色涩在线第一页| 一区二区三区欧美日| 日本韩国一区二区| 一区二区理论电影在线观看| 色999日韩国产欧美一区二区| 最好看的中文字幕久久| 色噜噜狠狠成人网p站| 亚洲精品国久久99热| 91麻豆免费看片| 亚洲自拍偷拍综合| 欧美日韩aaaaaa| 日本va欧美va欧美va精品| 日韩欧美一卡二卡| 国内精品在线播放| 久久精品欧美日韩精品| 成人综合婷婷国产精品久久蜜臀 | 久久午夜国产精品| 国产一区二区三区蝌蚪| 久久久不卡影院| 成人国产精品免费观看| 依依成人精品视频| 欧美日韩国产片| 久久99国产精品久久| 国产亚洲精品中文字幕| 成人国产一区二区三区精品| 一区二区三区中文字幕精品精品| 在线视频综合导航| 日韩精品欧美精品| 精品欧美一区二区在线观看| 高清在线成人网| 亚洲乱码国产乱码精品精的特点| 欧美午夜视频网站| 激情图片小说一区| 国产精品沙发午睡系列990531| 日本韩国精品在线| 日韩黄色小视频| 欧美经典一区二区| 欧美午夜精品久久久久久孕妇| 蜜桃av一区二区| 欧美国产一区二区| 色狠狠桃花综合| 久久国产欧美日韩精品| 国产精品乱码久久久久久| 欧美性猛交xxxx乱大交退制版| 免费观看久久久4p| 国产精品美日韩| 欧美久久久久久久久| 国产老肥熟一区二区三区| 亚洲制服丝袜av| 久久精品亚洲麻豆av一区二区 | 久久女同互慰一区二区三区| 99国产精品久久久| 日韩av成人高清| 一区视频在线播放| 欧美大片日本大片免费观看| 色综合久久天天| 国产大陆亚洲精品国产| 亚洲国产wwwccc36天堂| 久久精品夜色噜噜亚洲aⅴ| 欧美亚一区二区| 国产高清成人在线| 五月激情综合婷婷| 国产精品福利一区二区三区| 91精品国产丝袜白色高跟鞋| 色香蕉成人二区免费| 国产一区二区福利| 免费精品视频在线| 一区二区不卡在线播放 | 在线免费不卡视频|