亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關于我們
? 蟲蟲下載站

?? distributedwebdbwriter.java

?? 爬蟲數(shù)據(jù)的改進,并修正了一些bug
?? JAVA
?? 第 1 頁 / 共 5 頁
字號:

    /**
     * This class helps the LinksByURLProcessor test a list of
     * Page objects, sorted by URL, for outlink-counts.  We query
     * this class with a series of questions, based on Links sorted
     * by target URL.
     */
    private class TargetTester {
        MapFile.Reader pagedb;
        boolean hasPage = false;
        UTF8 pageURL = null;
        Page page = null;

        /**
         */
        public TargetTester(MapFile.Reader pagedb) throws IOException {
            this.pagedb = pagedb;
            this.pageURL = new UTF8();
            this.page = new Page();
            this.hasPage = pagedb.next(pageURL, page);
        }

        /**
         * Match the given URL against the sorted series of Page URLs.
         */
        public int hasOutlinks(UTF8 curURL) throws IOException {
            int returnCode = NO_OUTLINKS;
            int comparison = pageURL.compareTo(curURL);

            while (hasPage && comparison < 0) {
                hasPage = pagedb.next(pageURL, page);
                if (hasPage) {
                    comparison = pageURL.compareTo(curURL);
                }
            }

            if (hasPage) {
                if (comparison == 0) {
                    returnCode = (page.getNumOutlinks() > 0) ? HAS_OUTLINKS : NO_OUTLINKS;
                } else if (comparison > 0) {
                    //
                    // This situation indicates that the Link's 
                    // target page has been deleted, probably
                    // because we repeatedly failed to fetch the URL.
                    // So, we should delete the Link.
                    //
                    returnCode = LINK_INVALID;
                }
            }
            return returnCode;
        }

        /**
         */
        public void close() throws IOException {
            pagedb.close();
        }
    }

    /**
     * Closes down and merges changes to the URL-driven link
     * table.  This does nothing fancy, and propagates nothing
     * to a further stage.  There is no next stage!
     */
    private class LinksByURLProcessor extends CloseProcessor {
        MapFile.Reader pageDb;
        EditSectionGroupWriter futureEdits;

        /**
         */
        public LinksByURLProcessor(MapFile.Reader db, EditSectionGroupWriter editWriter, MapFile.Reader pageDb, EditSectionGroupWriter futureEdits) {
            super(LINKS_BY_URL, db, editWriter, new SequenceFile.Sorter(nfs, new LinkInstruction.UrlComparator(), NullWritable.class), new Link.UrlComparator(), Link.class, NullWritable.class, "LinksByURLPart");
            this.pageDb = pageDb;
            this.futureEdits = futureEdits;
        }

        /**
         */
        public long closeDown(File workingDir, File outputDir) throws IOException {
            long result = super.closeDown(workingDir, outputDir);
            pageDb.close();
            return result;
        }

        /**
         * Merge the existing db with the edit-stream into a brand-new file.
         */
        void mergeEdits(MapFile.Reader db, SequenceFile.Reader sortedEdits, MapFile.Writer newDb) throws IOException {
            WritableComparator comparator = new Link.UrlComparator();

            // Create the keys and vals we'll use
            LinkInstruction editItem = new LinkInstruction();
            Link readerItem = new Link();
        
            // Read the first items from both streams
            boolean hasEntries = db.next(readerItem, NullWritable.get());
            boolean hasEdits = sortedEdits.next(editItem, NullWritable.get());
            TargetTester targetTester = new TargetTester(pageDb);

            // As long as we have both edits and entries to process,
            // we need to interleave them
            while (hasEntries && hasEdits) {
                int curInstruction = editItem.getInstruction();

                if (curInstruction == ADD_LINK) {
                    //  When we add a link, we may replace a previous
                    //    link with identical URL and MD5 values.  Our
                    //    comparator will test both
                    //
                    int comparison = comparator.compare(readerItem, editItem.getLink());

                    if (comparison < 0) {
                        // Write the readerKey, just passing it along.
                        // Don't process the edit yet.
                        int linkTest = targetTester.hasOutlinks(readerItem.getURL());

                        if (linkTest == LINK_INVALID) {
                            liwriter.appendInstructionInfo(futureEdits, readerItem, DEL_SINGLE_LINK, NullWritable.get());
                            targetOutlinkEdits++;
                        } else {
                            boolean oldOutlinkStatus = readerItem.targetHasOutlink();
                            boolean newOutlinkStatus = (linkTest == HAS_OUTLINKS);
                            // Do the conditional so we minimize unnecessary 
                            // mod-writes.
                            if (oldOutlinkStatus != newOutlinkStatus) {
                                readerItem.setTargetHasOutlink(newOutlinkStatus);
                                liwriter.appendInstructionInfo(futureEdits, readerItem, ADD_LINK, NullWritable.get());
                                targetOutlinkEdits++;
                            }
                            newDb.append(readerItem, NullWritable.get());
                            itemsWritten++;
                        }
                        hasEntries = db.next(readerItem, NullWritable.get());
                    } else if (comparison == 0) {
                        // Write the new item, "replacing" the old one.
                        // We move to the next edit instruction and move
                        //    past the replaced db entry.
                        Link editLink = editItem.getLink();
                        int linkTest = targetTester.hasOutlinks(editLink.getURL());

                        // Delete the edit/readerItem from the other table if it's
                        // found to be invalid.
                        if (linkTest == LINK_INVALID) {
                            liwriter.appendInstructionInfo(futureEdits, editLink, DEL_SINGLE_LINK, NullWritable.get());
                        } else {
                            editLink.setTargetHasOutlink(linkTest == HAS_OUTLINKS);
                            liwriter.appendInstructionInfo(futureEdits, editLink, ADD_LINK, NullWritable.get());

                            newDb.append(editLink, NullWritable.get());
                            itemsWritten++;
                        }
                        targetOutlinkEdits++;

                        hasEntries = db.next(readerItem, NullWritable.get());
                        hasEdits = sortedEdits.next(editItem, NullWritable.get());
                    } else if (comparison > 0) {
                        // Write the new item.  We stay at the current
                        // db entry.
                        Link editLink = editItem.getLink();
                        int linkTest = targetTester.hasOutlinks(editLink.getURL());

                        // Delete the edit from the other table if it's invalid
                        if (linkTest == LINK_INVALID) {
                            liwriter.appendInstructionInfo(futureEdits, editLink, DEL_SINGLE_LINK, NullWritable.get());
                        } else {
                            editLink.setTargetHasOutlink(linkTest == HAS_OUTLINKS);
                            liwriter.appendInstructionInfo(futureEdits, editLink, ADD_LINK, NullWritable.get());
                            newDb.append(editLink, NullWritable.get());
                            itemsWritten++;
                        }
                        targetOutlinkEdits++;

                        hasEdits = sortedEdits.next(editItem, NullWritable.get());
                    }
                } else if (curInstruction == DEL_LINK) {
                    // When we delete a link, we do it by MD5 and apply
                    //   it to the index first.  A single delete instruction
                    //   may remove many items in the db, during the earlier
                    //   processing.  However, unlike the index-processing stage,
                    //   here we can expect a new DEL instruction for every 
                    //   item that we remove from the db.
                    //
                    int comparison = comparator.compare(readerItem, editItem.getLink());

                    if (comparison < 0) {
                        // Write readerKey, just passing it along.  Don't
                        //   process the edit yet.
                        int linkTest = targetTester.hasOutlinks(readerItem.getURL());

                        // Delete the reader item if it's found to be invalid
                        if (linkTest == LINK_INVALID) {
                            liwriter.appendInstructionInfo(futureEdits, readerItem, DEL_SINGLE_LINK, NullWritable.get());
                        } else {
                            readerItem.setTargetHasOutlink(linkTest == HAS_OUTLINKS);
                            liwriter.appendInstructionInfo(futureEdits, readerItem, ADD_LINK, NullWritable.get());
                            newDb.append(readerItem, NullWritable.get());
                            itemsWritten++;
                        }
                        targetOutlinkEdits++;

                        hasEntries = db.next(readerItem, NullWritable.get());
                    } else if (comparison == 0) {
                        // "Delete" the item by passing by the readerKey.
                        // We want a new entry, as well as the next instruction
                        // to process.
                        hasEntries = db.next(readerItem, NullWritable.get());
                        hasEdits = sortedEdits.next(editItem, NullWritable.get());
                    } else if (comparison > 0) {
                        // Ignore, move on to next instruction
                        hasEdits = sortedEdits.next(editItem, NullWritable.get());
                    }
                }
            }

            // Now we have only edits.  No more preexisting items!
            while (! hasEntries && hasEdits) {
                int curInstruction = editItem.getInstruction();

                if (curInstruction == ADD_LINK) {
                    //
                    // Add the item from the edit list.
                    //

                    //
                    // Make sure the outlinks flag is set properly.
                    //
                    Link editLink = editItem.getLink();
                    int linkTest = targetTester.hasOutlinks(editLink.getURL());
                    if (linkTest == LINK_INVALID) {
                        liwriter.appendInstructionInfo(futureEdits, editLink, DEL_SINGLE_LINK, NullWritable.get());
                    } else {
                        editLink.setTargetHasOutlink(linkTest == HAS_OUTLINKS);
                        liwriter.appendInstructionInfo(futureEdits, editLink, ADD_LINK, NullWritable.get());
                        newDb.append(editLink, NullWritable.get());
                        itemsWritten++;
                    }
                    targetOutlinkEdits++;
                } else if (curInstruction == DEL_LINK) {
                    // Ignore operation
                }
                // Move on to next edit
                hasEdits = sortedEdits.next(editItem, NullWritable.get());
            }

            // Now we have only preexisting items.  Just copy them
            // to the new file, in order.
            while (hasEntries && ! hasEdits) {
                //
                // Simply copy the remaining database items.
                //

                //
                // First, make sure the 'outlinks' flag is set properly.
                //
                int linkTest = targetTester.hasOutlinks(readerItem.getURL());
                if (linkTest == LINK_INVALID) {
                    liwriter.appendInstructionInfo(futureEdits, readerItem, DEL_SINGLE_LINK, NullWritable.get());
                    targetOutlinkEdits++;
                } else {
                    boolean oldOutlinkStatus = readerItem.targetHasOutlink();
                    boolean newOutlinkStatus = (linkTest == HAS_OUTLINKS);
                    if (oldOutlinkStatus != newOutlinkStatus) {
                        readerItem.setTargetHasOutlink(newOutlinkStatus);
                        liwriter.appendInstructionInfo(futureEdits, readerItem, ADD_LINK, NullWritable.get());
                        targetOutlinkEdits++;
                    }

                    // Now copy the object
                    newDb.append(readerItem, NullWritable.get());
                    itemsWritten++;
                }

                // Move on to next
                hasEntries = db.next(readerItem, NullWritable.get());
            }

            targetTester.close();
        }
    }

    /**
     * Method useful for the first time we create a distributed db project.
     * Basically need to write down the number of dirs we can expect.
     */
    public static void createDB(NutchFileSystem nfs, File root, int totalMachines) throws IOException {
        //
        // Check to see if the db already exists
        //
        File stdDir = new File(root, "standard");
        File machineInfo = new Fi

?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
欧美视频一二三区| 成人性生交大片免费看在线播放 | 色综合色综合色综合| 日日摸夜夜添夜夜添亚洲女人| 国产视频一区在线观看| 欧美日韩精品久久久| 99re热这里只有精品视频| 韩国欧美国产1区| 午夜久久久久久久久| 久久久五月婷婷| 色哟哟在线观看一区二区三区| 国产精品一区二区久激情瑜伽| 午夜视频一区在线观看| 最新久久zyz资源站| 久久日韩精品一区二区五区| 欧美日韩国产精选| 色综合激情久久| 不卡一区二区中文字幕| 国产九色sp调教91| 久久国产欧美日韩精品| 免费日韩伦理电影| 午夜私人影院久久久久| 欧美极品xxx| 亚洲视频一区在线观看| 久久精品欧美一区二区三区麻豆| 日韩三级精品电影久久久| 欧美私人免费视频| 日韩免费电影一区| 欧美一激情一区二区三区| 欧美日韩久久一区| 欧美丰满美乳xxx高潮www| 欧美视频在线一区| 在线播放视频一区| 色噜噜偷拍精品综合在线| 91精品国产综合久久蜜臀| 欧美一级生活片| 日韩你懂的在线观看| 欧美一区二区三区婷婷月色| 欧美一级在线视频| 久久久精品国产免费观看同学| 日韩三级视频在线看| 免费成人在线播放| 成人激情开心网| 久久99精品久久久久| 精品一区二区国语对白| 精品一区二区三区蜜桃| 日日摸夜夜添夜夜添精品视频| 亚洲色图清纯唯美| 亚洲激情自拍偷拍| 欧美sm美女调教| 欧美日韩国产一级片| 欧美日韩成人在线| 精品对白一区国产伦| 国产精品久久久久久久裸模| 国产精品卡一卡二卡三| 亚洲福利视频导航| 国产精品一区二区视频| 色狠狠桃花综合| 日韩精品一区在线观看| 成人免费一区二区三区视频| 亚洲成在线观看| 国产精品66部| 欧美老肥妇做.爰bbww视频| 2022国产精品视频| 亚洲精品国久久99热| 老司机精品视频在线| 成人激情文学综合网| 91片在线免费观看| 日本精品一区二区三区高清| 欧美色网站导航| 亚洲国产精品v| 热久久国产精品| 91日韩精品一区| 精品欧美黑人一区二区三区| 亚洲精品乱码久久久久久久久 | 奇米四色…亚洲| 成人免费的视频| 精品国产三级电影在线观看| 亚洲免费视频中文字幕| 国产精品久久久久久久久免费樱桃| 国内外成人在线| 欧美一区二区精美| 亚洲嫩草精品久久| 国产福利精品导航| 欧美理论片在线| 亚洲制服丝袜一区| 国产美女精品在线| 91丨porny丨在线| 国产欧美一区二区精品性| 日韩一区欧美二区| 99久久伊人久久99| 国产日韩欧美精品在线| 欧美国产禁国产网站cc| 美国精品在线观看| 777午夜精品免费视频| 亚洲成人动漫在线观看| 91行情网站电视在线观看高清版| 国产精品的网站| 粉嫩av一区二区三区在线播放 | 久久日韩精品一区二区五区| 精品乱码亚洲一区二区不卡| 亚洲国产一区二区a毛片| 亚洲黄色小说网站| 久久激情五月婷婷| 99国产欧美另类久久久精品| 国产精品国产三级国产aⅴ入口| 国产激情一区二区三区| 国产日韩成人精品| 国产乱码精品一区二区三区五月婷| 欧美一级片在线看| 日产欧产美韩系列久久99| 欧美日韩成人一区| 蜜臀av一区二区在线免费观看| 亚洲国产欧美日韩另类综合| 91麻豆精品一区二区三区| 亚洲激情图片一区| 色婷婷av一区二区三区软件 | 在线观看精品一区| 亚洲欧洲日韩一区二区三区| 成人18精品视频| 日本在线不卡一区| 亚洲国产精品av| 制服丝袜亚洲网站| 另类小说一区二区三区| 一区二区视频免费在线观看| 日韩女优电影在线观看| 91亚洲国产成人精品一区二三| 日本怡春院一区二区| 国产麻豆91精品| 日本视频一区二区三区| 亚洲已满18点击进入久久| 久久夜色精品一区| 日韩欧美你懂的| 欧美视频在线观看一区二区| 99免费精品视频| 国产精品综合一区二区三区| 美女一区二区视频| 亚洲精品国产a| 国产精品亲子伦对白| 欧美电影免费观看高清完整版在线观看| 国产99精品国产| 开心九九激情九九欧美日韩精美视频电影 | 国产.欧美.日韩| 日本强好片久久久久久aaa| 亚洲蜜臀av乱码久久精品蜜桃| 久久人人爽爽爽人久久久| 中文字幕人成不卡一区| 亚洲一区二区三区爽爽爽爽爽 | 欧美成人精品1314www| 亚洲日本在线看| 色先锋aa成人| 国内不卡的二区三区中文字幕 | caoporen国产精品视频| 蜜臀a∨国产成人精品| 成人美女在线视频| 国产视频一区在线播放| 亚洲综合自拍偷拍| 欧美一级高清片| 亚洲成人av在线电影| 精品毛片乱码1区2区3区| 国产高清亚洲一区| 亚洲欧洲在线观看av| 一区二区久久久久久| 欧美一三区三区四区免费在线看| 亚洲国产cao| 精品噜噜噜噜久久久久久久久试看| av一区二区不卡| 欧美一区二区福利视频| 国产精品毛片久久久久久| 成人av电影在线| 国产一区不卡在线| 欧美一级精品在线| 欧美日韩国产中文| 国产精品一区三区| 亚洲免费观看高清完整版在线观看熊| 岛国一区二区三区| 日韩av不卡在线观看| 亚洲三级小视频| 国产精品美女一区二区三区| 欧美精品日日鲁夜夜添| 亚洲日本电影在线| 亚洲精品一卡二卡| 久久亚洲精品小早川怜子| 另类人妖一区二区av| 懂色av一区二区三区免费看| 日韩av一级电影| 日韩精品一区国产麻豆| 91无套直看片红桃| 粉嫩绯色av一区二区在线观看| 亚洲日本护士毛茸茸| 日韩成人免费看| 久久99精品久久久| 日本欧美在线看| 激情综合色丁香一区二区| 中文一区一区三区高中清不卡| 久久久久99精品一区| 日韩视频在线你懂得| 精久久久久久久久久久| 亚洲欧美日韩久久精品| 肉肉av福利一精品导航|