亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關于我們
? 蟲蟲下載站

?? arcrecord.java

?? 爬蟲
?? JAVA
字號:
/* ARCRecord * * $Id: ARCRecord.java,v 1.34 2006/08/28 23:44:17 stack-sf Exp $ * * Created on Jan 7, 2004 * * Copyright (C) 2004 Internet Archive. * * This file is part of the Heritrix web crawler (crawler.archive.org). * * Heritrix is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * Heritrix is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with Heritrix; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */package org.archive.io.arc;import java.io.ByteArrayInputStream;import java.io.ByteArrayOutputStream;import java.io.IOException;import java.io.InputStream;import org.apache.commons.httpclient.Header;import org.apache.commons.httpclient.HttpParser;import org.apache.commons.httpclient.StatusLine;import org.apache.commons.httpclient.util.EncodingUtil;import org.archive.io.ArchiveRecord;import org.archive.io.ArchiveRecordHeader;/** * An ARC file record. * Does not compass the ARCRecord metadata line, just the record content. * @author stack */public class ARCRecord extends ArchiveRecord implements ARCConstants {    /**     * Http status line object.     *      * May be null if record is not http.     */    private StatusLine httpStatus = null;    /**     * Http header bytes.     *      * If non-null and bytes available, give out its contents before we     * go back to the underlying stream.     */    private InputStream httpHeaderStream = null;        /**     * Http headers.     *      * Only populated after reading of headers.     */    private Header [] httpHeaders = null;        /**     * Minimal http header length.     *      * I've seen in arcs content length of 1 with no      * header.     */    private static final long MIN_HTTP_HEADER_LENGTH =        "HTTP/1.1 200 OK\r\n".length();        /**     * Constructor.     *     * @param in Stream cue'd up to be at the start of the record this instance     * is to represent.     * @param metaData Meta data.     * @throws IOException     */    public ARCRecord(InputStream in, ArchiveRecordHeader metaData)    		throws IOException {        this(in, metaData, 0, true, false, true);    }    /**     * Constructor.     *     * @param in Stream cue'd up to be at the start of the record this instance     * is to represent.     * @param metaData Meta data.     * @param bodyOffset Offset into the body.  Usually 0.     * @param digest True if we're to calculate digest for this record.  Not     * digesting saves about ~15% of cpu during an ARC parse.     * @param strict Be strict parsing (Parsing stops if ARC inproperly     * formatted).     * @param parseHttpHeaders True if we are to parse HTTP headers.  Costs     * about ~20% of CPU during an ARC parse.     * @throws IOException     */    public ARCRecord(InputStream in, ArchiveRecordHeader metaData,        int bodyOffset, boolean digest, boolean strict,        final boolean parseHttpHeaders)     throws IOException {    	super(in, metaData, bodyOffset, digest, strict);        if (parseHttpHeaders) {            this.httpHeaderStream = readHttpHeader();        }    }        /**     * Skip over the the http header if one present.     *      * Subsequent reads will get the body.     *      * <p>Calling this method in the midst of reading the header     * will make for strange results.  Otherwise, safe to call     * at any time though before reading any of the arc record     * content is only time that it makes sense.     *      * <p>After calling this method, you can call     * {@link #getHttpHeaders()} to get the read http header.     *      * @throws IOException     */    public void skipHttpHeader() throws IOException {        if (this.httpHeaderStream != null) {            // Empty the httpHeaderStream            for (int available = this.httpHeaderStream.available();            		this.httpHeaderStream != null &&            			(available = this.httpHeaderStream.available()) > 0;) {                // We should be in this loop once only we should only do this                // buffer allocation once.                byte [] buffer = new byte[available];                // The read nulls out httpHeaderStream when done with it so                // need check for null in the loop control line.                read(buffer, 0, available);            }        }    }        /**     * Read http header if present.     * Technique borrowed from HttpClient HttpParse class.     * @return ByteArrayInputStream with the http header in it or null if no     * http header.     * @throws IOException     */    private InputStream readHttpHeader() throws IOException {        // If judged a record that doesn't have an http header, return        // immediately.        if(!getHeader().getUrl().startsWith("http") ||            getHeader().getLength() <= MIN_HTTP_HEADER_LENGTH) {            return null;        }        byte [] statusBytes = HttpParser.readRawLine(getIn());        int eolCharCount = getEolCharsCount(statusBytes);        if (eolCharCount <= 0) {            throw new IOException("Failed to read http status where one " +                " was expected: " + new String(statusBytes));        }        String statusLine = EncodingUtil.getString(statusBytes, 0,            statusBytes.length - eolCharCount, ARCConstants.DEFAULT_ENCODING);        if ((statusLine == null) ||                !StatusLine.startsWithHTTP(statusLine)) {            throw new IOException("Failed parse of http status line.");        }        this.httpStatus = new StatusLine(statusLine);                // Save off all bytes read.  Keep them as bytes rather than        // convert to strings so we don't have to worry about encodings        // though this should never be a problem doing http headers since        // its all supposed to be ascii.        ByteArrayOutputStream baos =            new ByteArrayOutputStream(statusBytes.length + 4 * 1024);        baos.write(statusBytes);                // Now read rest of the header lines looking for the separation        // between header and body.        for (byte [] lineBytes = null; true;) {            lineBytes = HttpParser.readRawLine(getIn());            eolCharCount = getEolCharsCount(lineBytes);            if (eolCharCount <= 0) {                throw new IOException("Failed reading http headers: " +                    ((lineBytes != null)? new String(lineBytes): null));            }            // Save the bytes read.            baos.write(lineBytes);            if ((lineBytes.length - eolCharCount) <= 0) {                // We've finished reading the http header.                break;            }        }                byte [] headerBytes = baos.toByteArray();        // Save off where body starts.        this.getMetaData().setContentBegin(headerBytes.length);        ByteArrayInputStream bais =            new ByteArrayInputStream(headerBytes);        if (!bais.markSupported()) {            throw new IOException("ByteArrayInputStream does not support mark");        }        bais.mark(headerBytes.length);        // Read the status line.  Don't let it into the parseHeaders function.        // It doesn't know what to do with it.        bais.read(statusBytes, 0, statusBytes.length);        this.httpHeaders = HttpParser.parseHeaders(bais,            ARCConstants.DEFAULT_ENCODING);        this.getMetaData().setStatusCode(Integer.toString(getStatusCode()));        bais.reset();        return bais;    }        /**     * Return status code for this record.     *      * This method will return -1 until the http header has been read.     * @return Status code.     */    public int getStatusCode() {        return (this.httpStatus == null)? -1: this.httpStatus.getStatusCode();    }        /**     * @param bytes Array of bytes to examine for an EOL.     * @return Count of end-of-line characters or zero if none.     */    private int getEolCharsCount(byte [] bytes) {        int count = 0;        if (bytes != null && bytes.length >=1 &&                bytes[bytes.length - 1] == '\n') {            count++;            if (bytes.length >=2 && bytes[bytes.length -2] == '\r') {                count++;            }        }        return count;    }    /**     * @return Meta data for this record.     */    public ARCRecordMetaData getMetaData() {        return (ARCRecordMetaData)getHeader();    }        /**     * @return http headers (Only available after header has been read).     */    public Header [] getHttpHeaders() {        return this.httpHeaders;    }    /**     * @return Next character in this ARCRecord's content else -1 if at end of     * this record.     * @throws IOException     */    public int read() throws IOException {        int c = -1;        if (this.httpHeaderStream != null &&                (this.httpHeaderStream.available() > 0)) {            // If http header, return bytes from it before we go to underlying            // stream.            c = this.httpHeaderStream.read();            // If done with the header stream, null it out.            if (this.httpHeaderStream.available() <= 0) {                this.httpHeaderStream = null;            }            incrementPosition();        } else {            c = super.read();        }        return c;    }    public int read(byte [] b, int offset, int length) throws IOException {        int read = -1;        if (this.httpHeaderStream != null &&                (this.httpHeaderStream.available() > 0)) {            // If http header, return bytes from it before we go to underlying            // stream.            read = Math.min(length, this.httpHeaderStream.available());            if (read == 0) {                read = -1;            } else {                read = this.httpHeaderStream.read(b, offset, read);            }            // If done with the header stream, null it out.            if (this.httpHeaderStream.available() <= 0) {                this.httpHeaderStream = null;            }            incrementPosition(read);        } else {            read = super.read(b, offset, length);        }        return read;    }    /**     * @return Offset at which the body begins (Only known after     * header has been read) or -1 if none or if we haven't read     * headers yet.  Usually length of HTTP headers (does not include ARC     * metadata line length).     */    public int getBodyOffset() {        return this.getMetaData().getContentBegin();    }        @Override    protected String getIp4Cdx(ArchiveRecordHeader h) {    	String result = null;    	if (h instanceof ARCRecordMetaData) {    		result = ((ARCRecordMetaData)h).getIp();    	}    	return (result != null)? result: super.getIp4Cdx(h);    }        @Override	protected String getStatusCode4Cdx(ArchiveRecordHeader h) {		String result = null;		if (h instanceof ARCRecordMetaData) {			result = ((ARCRecordMetaData) h).getStatusCode();		}		return (result != null) ? result: super.getStatusCode4Cdx(h);	}        @Override	protected String getDigest4Cdx(ArchiveRecordHeader h) {		String result = null;		if (h instanceof ARCRecordMetaData) {			result = ((ARCRecordMetaData) h).getDigest();		}		return (result != null) ? result: super.getDigest4Cdx(h);	}}

?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
色拍拍在线精品视频8848| 国产99久久精品| 亚洲素人一区二区| 国产欧美一区二区精品婷婷| 51精品秘密在线观看| 91成人在线免费观看| 成人av在线网| 成人a级免费电影| 成人av高清在线| 国产成人免费视频网站| 国产伦精品一区二区三区免费| 美女爽到高潮91| 日本在线不卡视频| 欧美96一区二区免费视频| 青娱乐精品视频在线| 香蕉乱码成人久久天堂爱免费| 一区二区三区在线观看视频| 亚洲制服丝袜一区| 亚洲成人在线观看视频| 亚洲一区二区三区四区在线免费观看 | 国产69精品久久777的优势| 韩国av一区二区三区在线观看| 美国三级日本三级久久99| 久久 天天综合| 国产凹凸在线观看一区二区| 国产成人精品亚洲777人妖| 99精品国产99久久久久久白柏| 色94色欧美sute亚洲13| 欧美日韩免费视频| 欧美第一区第二区| 欧美国产成人在线| 亚洲第一狼人社区| 狠狠久久亚洲欧美| 91丨九色丨尤物| 欧美久久高跟鞋激| 久久久国际精品| 亚洲精品国产第一综合99久久| 亚洲图片一区二区| 国产精品影视在线| 欧美老肥妇做.爰bbww| 久久久久久日产精品| 亚洲欧美日韩国产另类专区| 天天操天天色综合| 成人教育av在线| 91精品国产麻豆国产自产在线 | 激情成人综合网| 色综合久久中文综合久久97| 欧美一区二区啪啪| 中文字幕一区二区三区视频| 视频一区二区三区入口| 成人午夜又粗又硬又大| 欧美一区二区日韩一区二区| 国产精品国产三级国产aⅴ入口 | 日韩欧美一区二区在线视频| 国产亚洲制服色| 日韩精品亚洲一区| 色婷婷狠狠综合| 久久久噜噜噜久噜久久综合| 日韩电影在线一区二区| 色婷婷亚洲综合| 欧美国产丝袜视频| 精品无人区卡一卡二卡三乱码免费卡| 色屁屁一区二区| 最新日韩在线视频| 高清久久久久久| 337p日本欧洲亚洲大胆色噜噜| 亚洲永久免费av| 91麻豆国产福利在线观看| 久久久久久久久久久久久久久99 | 久久久三级国产网站| 日韩精品乱码免费| 日本黄色一区二区| 亚洲欧洲三级电影| 成人av影视在线观看| 国产欧美日韩另类视频免费观看| 精品中文字幕一区二区小辣椒| 欧美日韩高清一区二区三区| 一二三区精品福利视频| 91香蕉视频污在线| 亚洲人成影院在线观看| 色综合久久中文字幕| 亚洲精品v日韩精品| 91免费视频大全| 亚洲柠檬福利资源导航| 色狠狠综合天天综合综合| 亚洲人被黑人高潮完整版| 97成人超碰视| 亚洲最大的成人av| 欧美日韩成人在线| 美女视频黄免费的久久| 日韩欧美不卡在线观看视频| 久久99精品国产麻豆婷婷 | 国产欧美va欧美不卡在线| 国产另类ts人妖一区二区| 精品国产露脸精彩对白 | 欧美国产在线观看| 高清不卡一区二区在线| 国产精品美女久久久久久久久久久 | 欧美三级电影网站| 日本麻豆一区二区三区视频| 日韩一区二区三区三四区视频在线观看 | 自拍偷拍欧美激情| 在线看日本不卡| 奇米综合一区二区三区精品视频| 777a∨成人精品桃花网| 激情欧美一区二区| 最新不卡av在线| 欧美精三区欧美精三区| 国产综合久久久久久久久久久久| 国产人成亚洲第一网站在线播放 | 91最新地址在线播放| 一区二区高清视频在线观看| 欧美一区二区三区在线视频| 国产激情视频一区二区三区欧美| 亚洲欧洲日韩一区二区三区| 欧美美女一区二区在线观看| 国产自产2019最新不卡| 亚洲欧美激情小说另类| 欧美一区二区高清| 成人激情动漫在线观看| 同产精品九九九| 中文字幕av一区二区三区| 欧美美女bb生活片| 粉嫩aⅴ一区二区三区四区| 亚洲第一主播视频| 国产精品视频线看| 日韩一区二区不卡| 91视频91自| 国产精品主播直播| 欧美aⅴ一区二区三区视频| 国产精品视频观看| 精品久久五月天| 欧美三级日韩三级| 91在线视频在线| 国产成人精品影视| 久久精品999| 午夜精品久久久久| 亚洲女同一区二区| 国产视频在线观看一区二区三区| 欧美高清视频一二三区| 色悠悠久久综合| 成人免费看视频| 国产麻豆91精品| 蜜桃久久久久久久| 日日夜夜精品视频免费| 亚洲综合在线电影| 国产精品久久久久久久蜜臀 | 国产成人午夜视频| 激情小说欧美图片| 美国欧美日韩国产在线播放| 天天综合色天天综合| 亚洲一级在线观看| 亚洲最大成人综合| 一区二区三区欧美日| 日韩伦理电影网| 国产精品久久久久久一区二区三区 | 91久久国产最好的精华液| 粉嫩av一区二区三区粉嫩| 国产一区二区在线观看视频| 久久精品久久99精品久久| 免费成人美女在线观看| 日本亚洲电影天堂| 免费欧美在线视频| 久久成人羞羞网站| 国产一区二区毛片| 成人小视频免费观看| 91在线国产观看| 欧美综合天天夜夜久久| 欧美亚洲高清一区二区三区不卡| 在线观看不卡一区| 91精品国产色综合久久| 日韩精品在线一区二区| 亚洲精品一区二区精华| 国产日韩欧美电影| 亚洲视频电影在线| 午夜一区二区三区在线观看| 日韩精品一区第一页| 久久国产麻豆精品| 丰满放荡岳乱妇91ww| 色综合久久天天综合网| 欧美性猛片aaaaaaa做受| 欧美一区二区三区日韩视频| 久久影院视频免费| 中文字幕制服丝袜成人av| 亚洲一级电影视频| 狠狠色丁香久久婷婷综合丁香| 成人午夜视频免费看| 欧美亚一区二区| 久久久精品一品道一区| 亚洲色图另类专区| 青青草原综合久久大伊人精品优势| 国产一区二区h| 在线观看日韩一区| 久久亚洲精精品中文字幕早川悠里| 国产精品久久久久久福利一牛影视 | 国产清纯在线一区二区www| 亚洲裸体xxx| 久久66热偷产精品| 欧美在线播放高清精品| 久久久久亚洲综合|