亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關(guān)于我們
? 蟲蟲下載站

?? crawler.java

?? 一個Web爬蟲(機器人
?? JAVA
?? 第 1 頁 / 共 3 頁
字號:
/* * WebSPHINX web crawling toolkit * Copyright (C) 1998,1999 Carnegie Mellon University  *  * This library is free software; you can redistribute it * and/or modify it under the terms of the GNU Library * General Public License as published by the Free Software  * Foundation, version 2. * * WebSPHINX homepage: http://www.cs.cmu.edu/~rcm/websphinx/ */package websphinx;import websphinx.util.PriorityQueue;import websphinx.util.Timer;import java.util.Vector;import java.util.Enumeration;import java.util.Hashtable;import java.util.StringTokenizer;import java.net.URL;import java.net.MalformedURLException;import java.io.IOException;//#ifdef JDK1.1 import java.io.Serializable;import java.io.ObjectInputStream;import java.io.ObjectOutputStream;//#endif JDK1.1/** * Web crawler. * <P> * To write a crawler, extend this class and override  * shouldVisit () and visit() to create your own crawler. * <P> * To use a crawler: * <OL> * <LI>Initialize the crawler by calling * setRoot() (or one of its variants) and setting other  * crawl parameters. * <LI>Register any classifiers you need with addClassifier(). * <LI>Connect event listeners to monitor the crawler, *     such as websphinx.EventLog, websphinx.workbench.WebGraph, *     or websphinx.workbench.Statistics. * <LI>Call run() to start the crawler. * </OL> * A running crawler consists of a priority queue of  * Links waiting to be visited and a set of threads  * retrieving pages in parallel.  When a page is downloaded, * it is processed as follows: * <OL> * <LI><B>classify()</B>: The page is passed to the classify() method of  * every registered classifier, in increasing order of * their priority values.  Classifiers typically attach * informative labels to the page and its links, such as "homepage" * or "root page". * <LI><B>visit()</B>: The page is passed to the crawler's * visit() method for user-defined processing. * <LI><B>expand()</B>: The page is passed to the crawler's * expand() method to be expanded.  The default implementation * tests every unvisited hyperlink on the page with shouldVisit(),  * and puts * each link approved by shouldVisit() into the crawling queue. * </OL> * By default, when expanding the links of a page, the crawler  * only considers hyperlinks (not applets or inline images, for instance) that * point to Web pages (not mailto: links, for instance).  If you want * shouldVisit() to test every link on the page, use setLinkType(Crawler.ALL_LINKS). *  */public class Crawler implements Runnable//#ifdef JDK1.1 , Serializable //#endif JDK1.1{    //#ifdef JDK1.1     private static final long serialVersionUID = -3757789861952010450L;    //#endif JDK1.1    /**     * Specify WEB as the crawl domain to allow the crawler     * to visit any page on the World Wide Web.     */    public static final String[] WEB = null;    /**     * Specify SERVER as the crawl domain to limit the crawler     * to visit only pages on the same Web server (hostname     * and port number) as the root link from which it started.     */    public static final String[] SERVER = {"local"};    /**     * Specify SUBTREE as the crawl domain to limit the crawler     * to visit only pages which are descendants of the root link      * from which it started.     */    public static final String[] SUBTREE = {"sibling", "descendent"};    /**     * Specify HYPERLINKS as the link type to allow the crawler     * to visit only hyperlinks (A, AREA, and FRAME tags which     * point to http:, ftp:, file:, or gopher: URLs).     */    public static final String[] HYPERLINKS = {"hyperlink"};    /**     * Specify HYPERLINKS_AND_IMAGES as the link type to allow the crawler     * to visit only hyperlinks and inline images.     */    public static final String[] HYPERLINKS_AND_IMAGES = {"hyperlink", "image"};    /**     * Specify ALL_LINKS as the link type to allow the crawler     * to visit any kind of link     */    public static final String[] ALL_LINKS = null;        // Crawler parameters    private String name = getClass().getName();   // crawler's name    private transient Link[] roots = null;    private String[] rootHrefs = null;   // exists only when serializing crawler    private String[] domain = WEB;    private boolean synchronous = false;    private boolean depthFirst = true;    private String[] type = HYPERLINKS;    private boolean ignoreVisitedLinks = true;    private int maxDepth = 5;    private DownloadParameters dp = new DownloadParameters ()                                  .changeUserAgent (name);    private Vector classifiers = new Vector ();    private LinkPredicate linkPredicate;    private PagePredicate pagePredicate;    private Action action;        // Transient state    private transient Link[] crawledRoots = null;    private transient int state = CrawlEvent.CLEARED;        private transient Worm[] worms;        // background threads    private transient PriorityQueue fetchQueue;           // links waiting to be downloaded    private transient PriorityQueue crawlQueue;          // all links that have been expanded but not          // processed (used only if crawler is in synchronous mode)    private transient int numLinksTested;        // number of links tested by shouldVisit()    private transient int numPagesVisited;        // number of pages passed to visit()    private transient int numPagesLeft;          // all links that have been expanded but not processed          // == crawlQueue.size ()    // FIX: convert to immutable linked lists    private transient Vector crawlListeners;        // list of CrawlListeners    private transient Vector linkListeners;        // list of LinkListeners    private transient Hashtable visitedPages;        // visited pages (a set of URLs)    private transient RobotExclusion robotExclusion;        // robot exclusion cache    /**     * Make a new Crawler.     */    public Crawler () {        addClassifier (new StandardClassifier());        init ();    }    /*     * Initialize the transient fields of the crawler.     */    private void init () {        state = CrawlEvent.CLEARED;                numLinksTested = 0;        numPagesVisited = 0;        numPagesLeft = 0;                worms = null;        crawlQueue = new PriorityQueue();        fetchQueue = new PriorityQueue();        crawlListeners = new Vector ();        linkListeners = new Vector ();        visitedPages = new Hashtable ();        robotExclusion = new RobotExclusion (getName ());    }    /*     * Write a Crawler to an output stream.     */       //#ifdef JDK1.1     private void writeObject (ObjectOutputStream out)             throws IOException {        if (roots != null) {            rootHrefs = new String[roots.length];            for (int i=0; i<roots.length; ++i)                rootHrefs[i] = roots[i].getURL().toString();        }        else            rootHrefs = null;        out.defaultWriteObject ();        rootHrefs = null;    }//#endif JDK1.1    /*     * Read a Crawler from an input stream.     *///#ifdef JDK1.1     private void readObject (ObjectInputStream in)            throws IOException, ClassNotFoundException {        in.defaultReadObject ();        if (rootHrefs != null) {            roots = new Link [rootHrefs.length];            for (int i=0; i<rootHrefs.length; ++i)                roots[i] = new Link (rootHrefs[i]);        }        else            roots = null;        domain = useStandard (WEB, domain);        domain = useStandard (SERVER, domain);        domain = useStandard (SUBTREE, domain);        type = useStandard (HYPERLINKS, type);        type = useStandard (HYPERLINKS_AND_IMAGES, type);        type = useStandard (ALL_LINKS, type);                         init ();        if (linkPredicate != null)            linkPredicate.connected (this);        if (pagePredicate != null)            pagePredicate.connected (this);        if (action != null)            action.connected (this);            }    private static String[] useStandard (String[] standard, String[] s) {        if (s == null || standard == null || standard == s)            return s;        if (s.length != standard.length)            return s;        for (int i=0; i<s.length; ++i)            if (!s[i].equals (standard[i]))                return s;        return standard;    }//#endif JDK1.1    /**     * Start crawling.  Returns either when the crawl is done, or      * when pause() or stop() is called.  Because this method implements the     * java.lang.Runnable interface, a crawler can be run in the     * background thread.     */    public void run () {        crawledRoots = roots;        if (state == CrawlEvent.STOPPED)            clear ();                    if (state == CrawlEvent.CLEARED && crawledRoots != null) {            // give each root a default priority based on its position in the array            float priority = 0;            float increment = 1.0f/crawledRoots.length;            for (int i=0; i<crawledRoots.length; ++i) {                crawledRoots[i].setPriority (priority);                priority += increment;            }            submit (crawledRoots);        }                    state = CrawlEvent.STARTED;        sendCrawlEvent (state);                synchronized (crawlQueue) {                        Timer timer = new CrawlTimer (this);            int timeout = dp.getCrawlTimeout();            if (timeout > 0)                timer.set (timeout*1000, false);            int nWorms = Math.max (dp.getMaxThreads (), 1);            worms = new Worm[nWorms];            for (int i=0; i<nWorms; ++i) {                worms[i] = new Worm (this, i);                worms[i].start ();            }            try {                while (state == CrawlEvent.STARTED) {                    if (numPagesLeft == 0) {                        // ran out of links to crawl                        state = CrawlEvent.STOPPED;                        sendCrawlEvent (state);                    }                    else if (synchronous) {                        // Synchronous mode.                        // Main thread calls process() on each link                        // in crawlQueue, in priority order.                        Link link = (Link)crawlQueue.getMin ();                        if (link.getStatus () == LinkEvent.DOWNLOADED)                            process (link);                        else                            crawlQueue.wait ();                    }                    else                        // Asynchronous crawling.                        // Main thread does nothing but wait, while                        // background threads call process().                        crawlQueue.wait ();                }            } catch (InterruptedException e) {}            timer.cancel ();                            for (int i=0; i<worms.length; ++i)                worms[i].die ();            if (state == CrawlEvent.PAUSED) {                // put partly-processed links back in fetchQueue                synchronized (fetchQueue) {                    for (int i=0; i<worms.length; ++i)                        if (worms[i].link != null)                            fetchQueue.put (worms[i].link);                }            }            worms = null;        }    }    /**     * Initialize the crawler for a fresh crawl.  Clears the crawling queue     * and sets all crawling statistics to 0.  Stops the crawler     * if it is currently running.     */    public void clear () {        stop ();        numPagesVisited = 0;        numLinksTested = 0;        clearVisited ();        if (crawledRoots != null)            for (int i=0; i < crawledRoots.length; ++i)                crawledRoots[i].disconnect ();        crawledRoots = null;        state = CrawlEvent.CLEARED;        sendCrawlEvent (state);    }    /**     * Pause the crawl in progress.  If the crawler is running, then     * it finishes processing the current page, then returns.  The queues remain as-is,     * so calling run() again will resume the crawl exactly where it left off.     * pause() can be called from any thread.     */    public void pause () {        if (state == CrawlEvent.STARTED) {            synchronized (crawlQueue) {                state = CrawlEvent.PAUSED;                crawlQueue.notify ();            }            sendCrawlEvent (state);        }    }    /**     * Stop the crawl in progress.  If the crawler is running, then     * it finishes processing the current page, then returns.     * Empties the crawling queue.     */    public void stop () {        if (state == CrawlEvent.STARTED || state == CrawlEvent.PAUSED) {            synchronized (crawlQueue) {                synchronized (fetchQueue) {                    state = CrawlEvent.STOPPED;                    fetchQueue.clear ();                    crawlQueue.clear ();                    numPagesLeft = 0;                    crawlQueue.notify ();                }            }            sendCrawlEvent (state);        }    }    /*     * Timeout the crawl in progress.  Used internally by     * the CrawlTimer.     */    void timedOut () {        if (state == CrawlEvent.STARTED) {            synchronized (crawlQueue) {                synchronized (fetchQueue) {                    state = CrawlEvent.TIMED_OUT;                    fetchQueue.clear ();                    crawlQueue.clear ();                    numPagesLeft = 0;                    crawlQueue.notify ();                }            }            sendCrawlEvent (state);        }    }        /**     * Get state of crawler.     * @return one of CrawlEvent.STARTED, CrawlEvent.PAUSED, STOPPED, CLEARED.     */    public int getState () {        return state;    }

?? 快捷鍵說明

復(fù)制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
亚洲国产精品一区二区尤物区| 视频在线观看一区| 制服丝袜在线91| 国产成人av一区二区三区在线| 日韩一区欧美小说| 精品国产91九色蝌蚪| 91国产免费观看| 成人免费视频一区| 蜜桃一区二区三区在线| 一区2区3区在线看| 欧美精彩视频一区二区三区| 91精品国产色综合久久ai换脸| 91免费版pro下载短视频| 裸体一区二区三区| 亚洲成人在线观看视频| 国产精品热久久久久夜色精品三区| 欧美一区二区三区视频在线| 色屁屁一区二区| 成人18精品视频| 久久国产精品99久久久久久老狼| 亚洲一区二区三区免费视频| 国产精品一二三四| 日韩高清在线一区| 亚洲在线视频免费观看| 欧美激情一区二区三区在线| 精品区一区二区| 欧美电影在线免费观看| 欧美色图片你懂的| 色美美综合视频| 91理论电影在线观看| 成人高清视频在线| 国产麻豆精品theporn| 美女视频黄a大片欧美| 日韩av不卡在线观看| 亚洲午夜免费福利视频| 亚洲综合色自拍一区| 一区二区三区视频在线看| 成人免费在线视频| 亚洲色图视频免费播放| 中文字幕在线一区二区三区| 中文字幕免费不卡在线| 亚洲国产高清在线| 亚洲国产精品精华液ab| 欧美国产禁国产网站cc| 国产精品视频你懂的| 国产精品精品国产色婷婷| 国产精品久久久久久久裸模| 国产精品国模大尺度视频| 亚洲丝袜另类动漫二区| 97久久人人超碰| 99在线精品观看| a级精品国产片在线观看| 99国产精品99久久久久久| 色综合久久99| 欧美日本一区二区三区四区 | 色婷婷综合久久久久中文| av成人动漫在线观看| 91浏览器入口在线观看| 欧美日韩美少妇| 欧美刺激午夜性久久久久久久| 精品久久五月天| 国产精品美女久久久久aⅴ国产馆 国产精品美女久久久久av爽李琼 国产精品美女久久久久高潮 | 久久在线观看免费| 久久精品人人做| 综合久久一区二区三区| 亚洲国产成人91porn| 日本成人在线看| 国产成人精品免费一区二区| 91香蕉视频mp4| 欧美久久久久久久久中文字幕| 日韩欧美国产午夜精品| 国产女同性恋一区二区| 亚洲综合小说图片| 久88久久88久久久| 99久久99久久免费精品蜜臀| 欧美亚洲免费在线一区| 精品国产一区a| 国产精品毛片久久久久久久| 亚洲一级不卡视频| 久热成人在线视频| www.一区二区| 91精品在线一区二区| 欧美激情一区三区| 亚洲国产wwwccc36天堂| 国产精品一二三区在线| 欧美吻胸吃奶大尺度电影| 精品国产一区二区三区久久久蜜月 | 在线精品亚洲一区二区不卡| 欧美电影免费观看高清完整版在线| 国产精品区一区二区三区| 亚洲超碰精品一区二区| 丁香婷婷深情五月亚洲| 欧美日韩黄色影视| 国产片一区二区三区| 亚洲大片精品永久免费| 国产成人精品一区二| 欧美精品久久久久久久多人混战| 国产欧美精品在线观看| 日韩成人精品视频| 91网上在线视频| 久久人人爽人人爽| 日一区二区三区| 色悠悠久久综合| 国产欧美一区二区在线观看| 日韩一区精品视频| 色综合天天综合网天天狠天天| 久久久久久久精| 蜜臀av一级做a爰片久久| 色婷婷久久综合| 国产蜜臀av在线一区二区三区| 日韩电影一区二区三区| 在线一区二区三区做爰视频网站| 国产日产欧产精品推荐色| 久久精品国产在热久久| 欧美日韩成人激情| 亚洲激情网站免费观看| 99视频有精品| 久久夜色精品国产欧美乱极品| 青青草国产成人99久久| 欧美剧在线免费观看网站| 一区二区免费在线播放| 成人丝袜高跟foot| 久久精品欧美一区二区三区麻豆| 另类小说图片综合网| 欧美一区二区精品| 日日摸夜夜添夜夜添国产精品| 91久久奴性调教| 成人免费在线观看入口| 成人黄色电影在线| 国产精品美女久久久久久 | 久久成人综合网| 9191成人精品久久| 亚洲电影一区二区| 欧美性感一类影片在线播放| 亚洲三级免费观看| 日本精品一区二区三区四区的功能| 欧美国产日韩a欧美在线观看| 国产成人aaaa| 国产精品嫩草99a| 99国产精品久久| 亚洲欧美一区二区三区孕妇| 91丝袜美女网| 亚洲乱码精品一二三四区日韩在线| 不卡免费追剧大全电视剧网站| 国产精品无圣光一区二区| 成人的网站免费观看| 国产精品国产成人国产三级| 91丨porny丨国产入口| 亚洲制服丝袜一区| 91精品国产一区二区人妖| 日韩不卡一二三区| 精品久久久久av影院| 国产69精品久久久久毛片| 亚洲欧洲精品一区二区精品久久久 | 精品少妇一区二区三区| 国产毛片精品一区| 国产精品沙发午睡系列990531| 不卡的av电影| 一片黄亚洲嫩模| 91精品国产高清一区二区三区| 极品美女销魂一区二区三区| 久久久精品免费观看| 91美女片黄在线观看91美女| 午夜视频久久久久久| 337p粉嫩大胆色噜噜噜噜亚洲| 国产成人在线免费观看| 亚洲精品国产精品乱码不99| 在线播放中文一区| 国产毛片精品视频| 亚洲老妇xxxxxx| 日韩精品专区在线影院观看| 国产精品亚洲视频| 一区二区三区日韩精品视频| 日韩一级黄色片| 不卡电影一区二区三区| 五月激情六月综合| 欧美激情在线看| 欧美日韩一区 二区 三区 久久精品| 日韩福利电影在线| 国产欧美1区2区3区| 欧美日韩国产高清一区二区 | 91精品久久久久久久99蜜桃| 国产精品一区二区无线| 一区二区三国产精华液| 久久综合国产精品| 色综合久久88色综合天天 | 日本韩国一区二区三区视频| 日韩国产成人精品| 国产精品福利一区| 日韩小视频在线观看专区| eeuss影院一区二区三区| 视频一区二区中文字幕| 国产精品久久久久久亚洲伦| 欧美一区二区播放| 91在线视频网址| 国产酒店精品激情| 亚洲18女电影在线观看| 中文字幕一区在线观看视频| 欧美不卡视频一区| 欧美午夜视频网站|