亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關于我們
? 蟲蟲下載站

?? segmentreader.java

?? 爬蟲數據的改進,并修正了一些bug
?? JAVA
?? 第 1 頁 / 共 2 頁
字號:
package net.nutch.segment;

import java.io.*;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.Vector;
//import java.util.logging.Logger;
import org.apache.log4j.*;

import net.nutch.fetcher.FetcherOutput;
import net.nutch.io.ArrayFile;
import net.nutch.io.LongWritable;
import net.nutch.io.MapFile;
import net.nutch.fs.*;
import net.nutch.parse.ParseData;
import net.nutch.parse.ParseText;
import net.nutch.protocol.Content;

/**
 * This class holds together all data readers for an existing segment.
 * Some convenience methods are also provided, to read from the segment and
 * to reposition the current pointer.
 * 
 * @author Andrzej Bialecki <ab@getopt.org>
 */
public class SegmentReader {
  public static final Logger LOG = Logger.getLogger("segment");

  public ArrayFile.Reader parseTextReader;
  public ArrayFile.Reader parseDataReader;

  /**
   * The time when fetching of this segment started, as recorded
   * in fetcher output data.
   */
  //public long started = 0L;
  /**
   * The time when fetching of this segment finished, as recorded
   * in fetcher output data.
   */
  //public long finished = 0L;
  public long size = 0L;
  private long key = -1L;

  
  public File segmentDir;
  public NutchFileSystem nfs;

  /**
   * Open a segment for reading. If the segment is corrupted, do not attempt to fix it.
   * @param dir directory containing segment data
   * @throws Exception
   */
  public SegmentReader(File dir) throws Exception {
    this(new LocalFileSystem(), dir, true, true, false);
  }
  
  /**
   * Open a segment for reading. If segment is corrupted, do not attempt to fix it.
   * @param nfs filesystem
   * @param dir directory containing segment data
   * @throws Exception
   */
  public SegmentReader(NutchFileSystem nfs, File dir) throws Exception {
    this(nfs, dir, true, true, false);
  }
  
  /**
   * Open a segment for reading.
   * @param dir directory containing segment data
   * @param autoFix if true, and the segment is corrupted, attempt to 
   * fix errors and try to open it again. If the segment is corrupted, and
   * autoFix is false, or it was not possible to correct errors, an Exception is
   * thrown.
   * @throws Exception
   */
  public SegmentReader(File dir, boolean autoFix) throws Exception {
    this(new LocalFileSystem(), dir, true, true, autoFix);
  }
  
  /**
   * Open a segment for reading.
   * @param nfs filesystem
   * @param dir directory containing segment data
   * @param autoFix if true, and the segment is corrupted, attempt to 
   * fix errors and try to open it again. If the segment is corrupted, and
   * autoFix is false, or it was not possible to correct errors, an Exception is
   * thrown.
   * @throws Exception
   */
  public SegmentReader(NutchFileSystem nfs, File dir, boolean autoFix) throws Exception {
    this(nfs, dir, true, true, autoFix);
  }
  
  /**
   * Open a segment for reading. When a segment is open, its total size is checked
   * and cached in this class - however, only by actually reading entries one can
   * be sure about the exact number of valid, non-corrupt entries.
   * 
   * <p>If the segment was created with no-parse option (see {@link FetcherOutput#DIR_NAME_NP})
   * then automatically withParseText and withParseData will be forced to false.</p>
   * 
   * @param nfs NutchFileSystem to use
   * @param dir directory containing segment data
   * @param withParseText if true, read ParseText, otherwise ignore it
   * @param withParseData if true, read ParseData, otherwise ignore it
   * @param autoFix if true, and the segment is corrupt, try to automatically fix it.
   * If this parameter is false, and the segment is corrupt, or fixing was unsuccessful,
   * and Exception is thrown.
   * @throws Exception
   */
  public SegmentReader(NutchFileSystem nfs, File dir,
  			boolean withParseText, boolean withParseData,
          boolean autoFix) throws Exception {
    try {
      init(nfs, dir, withParseText, withParseData);
    } catch (Exception e) {
      boolean ok = false;
      if (autoFix) {
        // corrupt segment, attempt to fix
        ok = fixSegment(nfs, dir, withParseText, withParseData, false);
      }
      if (ok)
        init(nfs, dir, withParseText, withParseData);
      else throw new Exception("Segment " + dir + " is corrupted.");
    }
  }

  public static boolean isParsedSegment(NutchFileSystem nfs, File segdir) throws Exception {
    boolean res;
    File foDir = new File(segdir, FetcherOutput.DIR_NAME);
    if (nfs.exists(foDir) && nfs.isDirectory(foDir)) return true;
    foDir = new File(segdir, FetcherOutput.DIR_NAME_NP);
    if (nfs.exists(foDir) && nfs.isDirectory(foDir)) return false;
    throw new Exception("Missing or invalid '" + FetcherOutput.DIR_NAME + "' or '"
            + FetcherOutput.DIR_NAME_NP + "' directory in " + segdir);
  }
  
  /**
   * Attempt to fix a partially corrupted segment. Currently this means just
   * fixing broken MapFile's, using {@link MapFile#fix(NutchFileSystem, File, Class, Class, boolean)}
   * method.
   * @param nfs filesystem
   * @param dir segment directory
   * @param withContent if true, fix content, otherwise ignore it
   * @param withParseText if true, fix parse_text, otherwise ignore it
   * @param withParseData if true, fix parse_data, otherwise ignore it
   * @param dryrun if true, only show what would be done without performing any actions
   * @return
   */
  public static boolean fixSegment(NutchFileSystem nfs, File dir, 
          boolean withParseText, boolean withParseData,
          boolean dryrun) {
    String dr = "";
    if (dryrun) dr = "[DRY RUN] ";
    File fetcherOutput = null;
    File content = new File(dir, Content.DIR_NAME);
    File parseData = new File(dir, ParseData.DIR_NAME);
    File parseText = new File(dir, ParseText.DIR_NAME);
    long cnt = 0L;
    try {
      if (isParsedSegment(nfs, dir)) {
        fetcherOutput = new File(dir, FetcherOutput.DIR_NAME);
      } else {
        fetcherOutput = new File(dir, FetcherOutput.DIR_NAME_NP);
        withParseText = false;
        withParseData = false;
      }
      cnt = MapFile.fix(nfs, fetcherOutput, LongWritable.class, FetcherOutput.class, dryrun);
      if (cnt != -1) LOG.info(dr + " - fixed " + fetcherOutput.getName());

      if (withParseData) {
        cnt = MapFile.fix(nfs, parseData, LongWritable.class, ParseData.class, dryrun);
        if (cnt != -1) LOG.info(dr + " - fixed " + parseData.getName());
      }
      if (withParseText) {
        cnt = MapFile.fix(nfs, parseText, LongWritable.class, ParseText.class, dryrun);
        if (cnt != -1) LOG.info(dr + " - fixed " + parseText.getName());
      }
      LOG.info(dr + "Finished fixing " + dir.getName());
      return true;
    } catch (Throwable t) {
      LOG.warn(dr + "Unable to fix segment " + dir.getName() + ": " + t.getMessage());
      return false;
    }
  }

  private void init(NutchFileSystem nfs, File dir,
          boolean withParseText, boolean withParseData) throws Exception {
    segmentDir = dir;
    this.nfs = nfs;

    if (withParseText) parseTextReader = new ArrayFile.Reader(nfs, new File(dir, ParseText.DIR_NAME).toString());
    if (withParseData) parseDataReader = new ArrayFile.Reader(nfs, new File(dir, ParseData.DIR_NAME).toString());
    // count the number of valid entries.
    // XXX We assume that all other data files contain the
    // XXX same number of valid entries - which is not always
    // XXX true if Fetcher crashed in the middle of update.
    // XXX One should check for this later, when actually
    // XXX reading the entries.
    LongWritable w = new LongWritable();
    w.set(++size);
    try {
      while (parseTextReader.seek(w)) {
        w.set(++size);
      }
    } catch (Throwable eof) {
      // the file is truncated - probably due to a crashed fetcher.
      // Use just the part that we can...
      LOG.warn(" - data in segment " + dir + " is corrupt, using only " + size + " entries.");
    }
    parseTextReader.reset();
  }

  /**
   * Get a specified entry from the segment. Note: even if some of the storage objects
   * are null, but if respective readers are open a seek(n) operation will be performed
   * anyway, to ensure that the whole entry is valid.
   * 
   * @param n position of the entry
   * @param fo storage for FetcherOutput data. Must not be null.
   * @param co storage for Content data, or null.
   * @param pt storage for ParseText data, or null.
   * @param pd storage for ParseData data, or null.
   * @return true if all requested data successfuly read, false otherwise
   * @throws IOException
   */
  public synchronized boolean get(long n, ParseText pt, ParseData pd) throws IOException {
    //XXX a trivial implementation would be to do the following:
    //XXX   seek(n);
    //XXX   return next(fo, co, pt, pd);
    //XXX However, get(long, Writable) may be more optimized
    boolean valid = true;

?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
国产一区二区在线观看免费| 在线播放欧美女士性生活| 91蝌蚪porny| 91精品欧美综合在线观看最新| 久久久久久电影| 日韩中文字幕亚洲一区二区va在线| 国产成人夜色高潮福利影视| 91精品国产色综合久久不卡蜜臀 | 成人黄色在线网站| 欧美日韩在线观看一区二区| 国产精品久久久久久久浪潮网站| 日韩国产欧美在线播放| 91久久精品午夜一区二区| 国产免费久久精品| 精品在线你懂的| 欧美精品tushy高清| 樱花草国产18久久久久| 成人av电影在线| 国产欧美日韩三区| 国产成人综合在线| 精品粉嫩超白一线天av| 三级久久三级久久久| 精品视频1区2区| 亚洲一级在线观看| 在线看国产一区二区| 一区二区三区影院| 色8久久精品久久久久久蜜| 国产亚洲精久久久久久| 国产乱码精品一区二区三区忘忧草| 欧美一区二视频| 丝袜脚交一区二区| 欧美一区二区三区在线观看 | 亚洲视频狠狠干| 欧美综合在线视频| 亚洲欧美在线视频| 91小宝寻花一区二区三区| 国产精品美女久久久久久久久 | 亚洲妇女屁股眼交7| 欧美亚洲国产bt| 三级影片在线观看欧美日韩一区二区| 欧美亚洲尤物久久| 日韩中文字幕麻豆| 欧美一区二区人人喊爽| 美女脱光内衣内裤视频久久网站| 欧美电视剧免费全集观看| 久久爱www久久做| 国产欧美视频一区二区| jvid福利写真一区二区三区| 亚洲男人天堂av| 欧美日韩免费不卡视频一区二区三区| 亚洲r级在线视频| 欧美二区三区的天堂| 黄色成人免费在线| 国产精品久久久久久久久晋中 | 久久久精品中文字幕麻豆发布| 黄色精品一二区| 国产精品白丝在线| 欧美偷拍一区二区| 久久av中文字幕片| 国产精品人妖ts系列视频| 在线观看日产精品| 国产一区二区在线观看免费| 亚洲三级在线看| 欧美一级精品在线| 成人精品一区二区三区中文字幕| 夜夜嗨av一区二区三区中文字幕 | 久久精品久久综合| 国产精品久久久99| 777欧美精品| 国产成人精品免费网站| 一级日本不卡的影视| 日韩一区二区高清| 91丨九色丨蝌蚪富婆spa| 免费在线观看一区二区三区| 国产精品国产三级国产有无不卡| 欧美日韩中文另类| 成人免费av在线| 另类小说综合欧美亚洲| 亚洲欧美另类综合偷拍| 精品不卡在线视频| 欧美亚洲国产bt| 成人高清在线视频| 国产麻豆91精品| 日韩影院精彩在线| 亚洲少妇30p| 国产欧美一区二区三区鸳鸯浴 | 久久爱www久久做| 亚洲一区二区三区四区在线| 久久在线观看免费| 欧美日韩国产区一| 一本久久a久久精品亚洲| 国产精品一区二区久久不卡| 午夜久久福利影院| 夜夜嗨av一区二区三区四季av| 久久久久久久精| 欧美成人精品1314www| 欧美二区乱c少妇| 欧美在线观看视频一区二区| 成人激情视频网站| 成人黄色大片在线观看| 国产综合久久久久久久久久久久| 午夜婷婷国产麻豆精品| 亚洲午夜国产一区99re久久| 国产精品成人一区二区三区夜夜夜| 久久先锋影音av| 精品精品国产高清一毛片一天堂| 91麻豆精品国产91| 91麻豆精品91久久久久同性| 欧美伦理影视网| 7777精品久久久大香线蕉 | 天堂蜜桃一区二区三区| 亚洲午夜免费福利视频| 亚洲日本乱码在线观看| 亚洲欧美另类小说| 一区二区三区av电影| 亚洲日穴在线视频| 国产成都精品91一区二区三| 丰满岳乱妇一区二区三区| 精品处破学生在线二十三| 91麻豆精品国产综合久久久久久 | 91精品国产综合久久久蜜臀粉嫩| 97aⅴ精品视频一二三区| eeuss国产一区二区三区| 波波电影院一区二区三区| 成人av在线一区二区三区| 成人黄色国产精品网站大全在线免费观看| 风间由美中文字幕在线看视频国产欧美| 国产精品白丝av| 成人h精品动漫一区二区三区| 高清成人在线观看| 91色.com| 欧美视频一区在线| 欧美岛国在线观看| 欧美激情一区二区三区蜜桃视频 | 人人超碰91尤物精品国产| 蜜桃视频一区二区三区| 国产成a人亚洲精| 91老司机福利 在线| 欧美日韩国产三级| 久久美女艺术照精彩视频福利播放| 国产日韩影视精品| 亚洲在线免费播放| 精品亚洲成a人在线观看 | 国产麻豆精品一区二区| heyzo一本久久综合| 精品视频在线免费看| 精品日韩欧美在线| 一区二区中文视频| 三级不卡在线观看| 懂色av噜噜一区二区三区av| 欧美在线视频日韩| 精品捆绑美女sm三区| 日韩理论片在线| 美脚の诱脚舐め脚责91| 99综合电影在线视频| 91精品国产一区二区人妖| 中文字幕不卡一区| 日本欧美大码aⅴ在线播放| 成人高清视频在线观看| 91精品久久久久久久99蜜桃| 国产丝袜在线精品| 日本在线播放一区二区三区| 成人性视频免费网站| 制服丝袜一区二区三区| 亚洲品质自拍视频| 国产在线看一区| 欧美日韩中文字幕一区二区| 欧美激情资源网| 美腿丝袜亚洲三区| 欧美性大战久久| 国产精品国产馆在线真实露脸| 六月丁香综合在线视频| 91黄色免费网站| 国产精品久久久久久久久动漫 | 日韩激情av在线| 91一区二区在线| 亚洲国产精品精华液2区45| 日本一道高清亚洲日美韩| 色综合天天综合给合国产| www国产成人| 精品亚洲欧美一区| 欧美一区二区三区啪啪| 亚洲福中文字幕伊人影院| 99久久精品免费| 欧美精彩视频一区二区三区| 久久99精品国产.久久久久| 91麻豆精品国产91久久久久| 亚洲国产精品精华液网站| 99久久精品国产麻豆演员表| 国产精品嫩草99a| 懂色av中文一区二区三区 | 国产精品白丝在线| 国产高清久久久久| 日韩欧美国产综合| 日韩av一级片| 精品剧情v国产在线观看在线| 午夜激情久久久| 欧美日韩在线观看一区二区 | 成人国产电影网| 国产精品久久久久久久久免费桃花 |