亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關于我們
? 蟲蟲下載站

?? warcreader.java

?? 爬蟲
?? JAVA
字號:
/* $Id: WARCReader.java,v 1.5 2006/08/28 23:44:17 stack-sf Exp $ * * Created Aug 23, 2006 * * Copyright (C) 2006 Internet Archive. * * This file is part of the Heritrix web crawler (crawler.archive.org). * * Heritrix is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * Heritrix is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with Heritrix; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */package org.archive.io.warc;import java.io.File;import java.io.IOException;import java.io.InputStream;import java.util.Iterator;import java.util.List;import org.apache.commons.cli.CommandLine;import org.apache.commons.cli.HelpFormatter;import org.apache.commons.cli.Option;import org.apache.commons.cli.Options;import org.apache.commons.cli.ParseException;import org.apache.commons.cli.PosixParser;import org.archive.io.ArchiveReader;import org.archive.io.ArchiveRecord;/** * WARCReader. * Go via {@link WARCReaderFactory} to get instance. * @author stack * @version $Date: 2006/08/28 23:44:17 $ $Version$ */public class WARCReader extends ArchiveReader implements WARCConstants {    WARCReader() {        super();    }        @Override    protected void initialize(String i) {        super.initialize(i);        setVersion(WARC_VERSION);    }        /**     * Skip over any trailing new lines at end of the record so we're lined up     * ready to read the next.     * @param record     * @throws IOException     */    protected void gotoEOR(ArchiveRecord record) throws IOException {        if (record.available() != 0) {            throw new IOException("Record should be exhausted before coming " +                "in here");        }        // Records end in 2*CRLF.  Such it up.        readExpectedChar(getIn(), CRLF.charAt(0));        readExpectedChar(getIn(), CRLF.charAt(1));        readExpectedChar(getIn(), CRLF.charAt(0));        readExpectedChar(getIn(), CRLF.charAt(1));    }        protected void readExpectedChar(final InputStream is, final int expected)    throws IOException {        int c = is.read();        if (c != expected) {            throw new IOException("Unexpected character " +                Integer.toHexString(c) + "(Expecting " +                Integer.toHexString(expected) + ")");        }    }        /**     * Create new WARC record.     * Encapsulate housekeeping that has to do w/ creating new Record.     * @param is InputStream to use.     * @param offset Absolute offset into WARC file.     * @return A WARCRecord.     * @throws IOException     */    protected WARCRecord createArchiveRecord(InputStream is, long offset)    throws IOException {        return (WARCRecord)currentRecord(new WARCRecord(is,        	getReaderIdentifier(), offset, isDigest(), isStrict()));    }    	@Override	public void dump(boolean compress)	throws IOException, java.text.ParseException {	    for (final Iterator<ArchiveRecord> i = iterator(); i.hasNext();) {            ArchiveRecord r = i.next();            System.out.println(r.getHeader().toString());            r.dump();            System.out.println();        }	}	@Override	public String getDotFileExtension() {		return DOT_WARC_FILE_EXTENSION;	}	@Override	public String getFileExtension() {		return WARC_FILE_EXTENSION;	}         // Static methods follow.  Mostly for command-line processing.    /**     *     * @param formatter Help formatter instance.     * @param options Usage options.     * @param exitCode Exit code.     */    private static void usage(HelpFormatter formatter, Options options,            int exitCode) {        formatter.printHelp("java org.archive.io.arc.WARCReader" +            " [--digest=true|false] \\\n" +            " [--format=cdx|cdxfile|dump|gzipdump]" +            " [--offset=#] \\\n[--strict] [--parse] WARC_FILE|WARC_URL",                options);        System.exit(exitCode);    }    /**     * Write out the arcfile.     *      * @param reader     * @param format Format to use outputting.     * @throws IOException     * @throws java.text.ParseException     */    protected static void output(WARCReader reader, String format)    throws IOException, java.text.ParseException {    	if (!reader.output(format)) {            throw new IOException("Unsupported format: " + format);    	}    }        /**     * Output passed record using passed format specifier.     * @param r ARCReader instance to output.     * @param format What format to use outputting.     * @throws IOException     */    protected static void outputRecord(final WARCReader r,    	final String format)    throws IOException {    	if (!r.outputRecord(format)) {            throw new IOException("Unsupported format" +                " (or unsupported on a single record): " + format);    	}    }    /**     * Generate a CDX index file for an ARC file.     *     * @param urlOrPath The ARC file to generate a CDX index for     * @throws IOException     * @throws java.text.ParseException     */    public static void createCDXIndexFile(String urlOrPath)    throws IOException, java.text.ParseException {    	WARCReader r = WARCReaderFactory.get(urlOrPath);    	r.setStrict(false);    	r.setDigest(true);    	output(r, CDX_FILE);    }    /**     * Command-line interface to WARCReader.     *     * Here is the command-line interface:     * <pre>     * usage: java org.archive.io.arc.WARCReader [--offset=#] ARCFILE     *  -h,--help      Prints this message and exits.     *  -o,--offset    Outputs record at this offset into arc file.</pre>     *     * <p>Outputs using a pseudo-CDX format as described here:     * <a href="http://www.archive.org/web/researcher/cdx_legend.php">CDX     * Legent</a> and here     * <a href="http://www.archive.org/web/researcher/example_cdx.php">Example</a>.     * Legend used in below is: 'CDX b e a m s c V (or v if uncompressed) n g'.     * Hash is hard-coded straight SHA-1 hash of content.     *     * @param args Command-line arguments.     * @throws ParseException Failed parse of the command line.     * @throws IOException     * @throws java.text.ParseException     */    public static void main(String [] args)    throws ParseException, IOException, java.text.ParseException {        Options options = new Options();        options.addOption(new Option("h","help", false,            "Prints this message and exits."));        options.addOption(new Option("o","offset", true,            "Outputs record at this offset into arc file."));        options.addOption(new Option("d","digest", true,            "Pass true|false. Expensive. Default: true (SHA-1)."));        options.addOption(new Option("s","strict", false,            "Strict mode. Fails parse if incorrectly formatted WARC."));        options.addOption(new Option("f","format", true,            "Output options: 'cdx', cdxfile', 'dump', 'gzipdump'," +            "'or 'nohead'. Default: 'cdx'."));        PosixParser parser = new PosixParser();        CommandLine cmdline = parser.parse(options, args, false);        List cmdlineArgs = cmdline.getArgList();        Option [] cmdlineOptions = cmdline.getOptions();        HelpFormatter formatter = new HelpFormatter();        // If no args, print help.        if (cmdlineArgs.size() <= 0) {            usage(formatter, options, 0);        }        // Now look at options passed.        long offset = -1;        boolean digest = false;        boolean strict = false;        String format = CDX;        for (int i = 0; i < cmdlineOptions.length; i++) {            switch(cmdlineOptions[i].getId()) {                case 'h':                    usage(formatter, options, 0);                    break;                case 'o':                    offset =                        Long.parseLong(cmdlineOptions[i].getValue());                    break;                                    case 's':                    strict = true;                    break;                                    case 'd':                	digest = getTrueOrFalse(cmdlineOptions[i].getValue());                    break;                                    case 'f':                    format = cmdlineOptions[i].getValue().toLowerCase();                    boolean match = false;                    // List of supported formats.                    final String [] supportedFormats =                		{CDX, DUMP, GZIP_DUMP, CDX_FILE};                    for (int ii = 0; ii < supportedFormats.length; ii++) {                        if (supportedFormats[ii].equals(format)) {                            match = true;                            break;                        }                    }                    if (!match) {                        usage(formatter, options, 1);                    }                    break;                default:                    throw new RuntimeException("Unexpected option: " +                        + cmdlineOptions[i].getId());            }        }                if (offset >= 0) {            if (cmdlineArgs.size() != 1) {                System.out.println("Error: Pass one arcfile only.");                usage(formatter, options, 1);            }            WARCReader r = WARCReaderFactory.get(            	new File((String)cmdlineArgs.get(0)), offset);            r.setStrict(strict);            outputRecord(r, format);        } else {            for (Iterator i = cmdlineArgs.iterator(); i.hasNext();) {                String urlOrPath = (String)i.next();                try {                	WARCReader r = WARCReaderFactory.get(urlOrPath);                	r.setStrict(strict);                	r.setDigest(digest);                    output(r, format);                } catch (RuntimeException e) {                    // Write out name of file we failed on to help with                    // debugging.  Then print stack trace and try to keep                    // going.  We do this for case where we're being fed                    // a bunch of ARCs; just note the bad one and move                    // on to the next.                    System.err.println("Exception processing " + urlOrPath +                        ": " + e.getMessage());                    e.printStackTrace(System.err);                    System.exit(1);                }            }        }    }   }

?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
日韩午夜精品电影| 久久久久久亚洲综合| 国内成人自拍视频| 一区二区三区在线观看视频| 2024国产精品视频| 欧美日韩精品系列| 91丨porny丨户外露出| 久久国产剧场电影| 亚洲综合在线免费观看| 久久综合久久99| 欧美电影一区二区| 91在线观看污| 国产一区二区0| 人人狠狠综合久久亚洲| 亚洲欧美一区二区三区极速播放| 欧美xxxxxxxx| 91精品国产美女浴室洗澡无遮挡| 一本色道a无线码一区v| 豆国产96在线|亚洲| 久久99精品久久久久久国产越南| 亚洲成av人综合在线观看| 亚洲视频狠狠干| 国产精品沙发午睡系列990531| 欧美一级高清片| 7777精品伊人久久久大香线蕉 | 91伊人久久大香线蕉| 国内精品久久久久影院一蜜桃| 日韩精品色哟哟| 五月婷婷激情综合网| 一区二区久久久| 亚洲欧洲综合另类| 亚洲男人的天堂av| 亚洲三级久久久| 国产精品成人网| 国产精品美女久久久久久久久| 久久午夜老司机| 精品国产123| 久久综合色综合88| 精品999在线播放| 亚洲精品一区二区三区香蕉| 精品入口麻豆88视频| 欧美成人vr18sexvr| 精品久久久久久久久久久久包黑料| 欧美一区二区精品久久911| 91精品在线观看入口| 欧美一级免费大片| 日韩女同互慰一区二区| 欧美v国产在线一区二区三区| 精品蜜桃在线看| 久久精品一区二区三区不卡| 国产欧美日韩另类视频免费观看| 日本一区二区电影| 国产精品欧美久久久久无广告| 国产午夜精品一区二区三区视频| 欧美激情一区不卡| 日韩久久一区二区| 亚洲与欧洲av电影| 日本最新不卡在线| 国产一区二区美女| 不卡一区中文字幕| 欧美在线免费观看亚洲| 91精品国产一区二区三区香蕉| 日韩欧美一二三区| 国产欧美一区二区精品仙草咪| 国产精品久久久久婷婷二区次| 亚洲精品中文在线影院| 亚洲成人资源网| 精品一区二区在线视频| 大白屁股一区二区视频| 色吊一区二区三区| 日韩免费观看2025年上映的电影| 久久无码av三级| 亚洲女与黑人做爰| 日韩电影在线一区| 国产白丝网站精品污在线入口| 色综合视频在线观看| 91精品久久久久久久91蜜桃| 久久色在线观看| 一区二区在线看| 久久99精品久久久久久动态图 | 精品国产一区二区三区不卡| 国产精品日韩精品欧美在线| 亚洲午夜精品一区二区三区他趣| 开心九九激情九九欧美日韩精美视频电影| 国产一区激情在线| 欧美在线一区二区| 久久久国产综合精品女国产盗摄| 亚洲伦在线观看| 激情深爱一区二区| 欧美在线不卡视频| 国产亚洲成av人在线观看导航| 一区二区三区四区在线免费观看| 久久国产精品72免费观看| 日本韩国欧美三级| 久久久www成人免费毛片麻豆 | 99久久精品国产一区二区三区| 91麻豆精品国产91久久久更新时间 | 国产麻豆视频一区二区| 色噜噜偷拍精品综合在线| 2019国产精品| 午夜婷婷国产麻豆精品| 大白屁股一区二区视频| 欧美v日韩v国产v| 午夜精品久久久久久久| 成人h精品动漫一区二区三区| 日韩欧美国产综合| 亚洲制服丝袜一区| 99久久99久久精品免费看蜜桃| 欧美成人精品1314www| 亚洲成人一区二区| 91丨九色丨蝌蚪富婆spa| 国产亚洲短视频| 蜜桃av一区二区三区| 欧美日韩国产另类一区| 中文字幕综合网| 国产91露脸合集magnet| 欧美电视剧在线看免费| 天天影视涩香欲综合网| 色婷婷激情综合| 中文字幕日韩av资源站| 国产精选一区二区三区| 欧美大片免费久久精品三p| 亚洲超碰97人人做人人爱| 一本久久精品一区二区 | 视频一区中文字幕国产| 日本韩国一区二区三区视频| 成人免费在线视频观看| 成人av在线电影| 国产免费观看久久| 国产成人亚洲精品狼色在线| 亚洲精品在线观看网站| 麻豆国产欧美日韩综合精品二区| 在线电影欧美成精品| 午夜欧美大尺度福利影院在线看| 欧美三区在线观看| 亚洲影视资源网| 欧美午夜精品一区| 亚洲综合激情另类小说区| 欧美在线视频你懂得| 亚洲国产欧美日韩另类综合| 欧美日韩一区二区三区四区| 亚洲一区二区综合| 欧美日韩三级在线| 午夜日韩在线电影| 欧美一级在线免费| 精品在线一区二区三区| 久久精品夜夜夜夜久久| 国产91综合网| 国产精品超碰97尤物18| 色88888久久久久久影院按摩| 亚洲综合在线免费观看| 欧美人体做爰大胆视频| 欧美aⅴ一区二区三区视频| 日韩精品中文字幕一区二区三区| 精品一区精品二区高清| 欧美国产日韩a欧美在线观看 | 国产福利一区在线观看| 国产精品毛片久久久久久久| 一本久道中文字幕精品亚洲嫩| 亚洲午夜电影在线观看| 91精品国产综合久久久久| 韩国理伦片一区二区三区在线播放 | 香蕉成人伊视频在线观看| 欧美一区二区视频在线观看| 国内偷窥港台综合视频在线播放| 国产女人水真多18毛片18精品视频| av中文字幕一区| 一区二区理论电影在线观看| 91精品国产欧美一区二区| 国产乱码精品1区2区3区| 亚洲丝袜自拍清纯另类| 欧美日韩aaa| 国产露脸91国语对白| 亚洲欧美日韩电影| 欧美一区二区三区不卡| 成人午夜av在线| 午夜亚洲福利老司机| 久久亚洲春色中文字幕久久久| 色综合视频在线观看| 欧美aaaaaa午夜精品| 中文字幕在线一区| 欧美一区二区黄色| 99热在这里有精品免费| 日韩av中文字幕一区二区| 日本一区二区三区国色天香| 日本福利一区二区| 国产成人在线网站| 亚洲一区二区三区四区的| 久久精品夜夜夜夜久久| 欧美日韩久久久| 国产91精品在线观看| 午夜伦欧美伦电影理论片| 欧美激情中文不卡| 538在线一区二区精品国产| 成人av动漫网站| 美女网站色91| 亚洲国产精品一区二区www| 国产丝袜欧美中文另类| 欧美电影在哪看比较好| 色哟哟国产精品免费观看|