?? arcwritertest.java
字號:
/* ARCWriterTest * * $Id: ARCWriterTest.java,v 1.37 2006/08/25 17:34:38 stack-sf Exp $ * * Created on Dec 31, 2003. * * Copyright (C) 2003 Internet Archive. * * This file is part of the Heritrix web crawler (crawler.archive.org). * * Heritrix is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * Heritrix is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with Heritrix; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */package org.archive.io.arc;import java.io.ByteArrayOutputStream;import java.io.File;import java.io.FileNotFoundException;import java.io.IOException;import java.io.OutputStream;import java.io.PrintStream;import java.util.Arrays;import java.util.Date;import java.util.Iterator;import java.util.List;import java.util.concurrent.atomic.AtomicInteger;import org.archive.io.ArchiveRecord;import org.archive.io.ReplayInputStream;import org.archive.io.WriterPoolMember;import org.archive.util.ArchiveUtils;import org.archive.util.FileUtils;import org.archive.util.TmpDirTestCase;/** * Test ARCWriter class. * * This code exercises ARCWriter AND ARCReader. First it writes ARCs w/ * ARCWriter. Then it validates what was written w/ ARCReader. * * @author stack */public class ARCWriterTestextends TmpDirTestCase implements ARCConstants { /** * Prefix to use for ARC files made by JUNIT. */ private static final String PREFIX = /* TODO DEFAULT_ARC_FILE_PREFIX*/ "IAH"; private static final String SOME_URL = "http://www.archive.org/test/"; private static final AtomicInteger SERIAL_NO = new AtomicInteger(); /* * @see TestCase#setUp() */ protected void setUp() throws Exception { super.setUp(); } /* * @see TestCase#tearDown() */ protected void tearDown() throws Exception { super.tearDown(); } protected static String getContent() { return getContent(null); } protected static String getContent(String indexStr) { String page = (indexStr != null)? "Page #" + indexStr: "Some Page"; return "HTTP/1.1 200 OK\r\n" + "Content-Type: text/html\r\n\r\n" + "<html><head><title>" + page + "</title></head>" + "<body>" + page + "</body></html>"; } protected int writeRandomHTTPRecord(ARCWriter arcWriter, int index) throws IOException { String indexStr = Integer.toString(index); ByteArrayOutputStream baos = new ByteArrayOutputStream(); // Start the record with an arbitrary 14-digit date per RFC2540 String now = ArchiveUtils.get14DigitDate(); int recordLength = 0; byte[] record = (getContent(indexStr)).getBytes(); recordLength += record.length; baos.write(record); // Add the newline between records back in baos.write("\n".getBytes()); recordLength += 1; arcWriter.write("http://www.one.net/id=" + indexStr, "text/html", "0.1.2.3", Long.parseLong(now), recordLength, baos); return recordLength; } private File writeRecords(String baseName, boolean compress, int maxSize, int recordCount) throws IOException { cleanUpOldFiles(baseName); File [] files = {getTmpDir()}; ARCWriter arcWriter = new ARCWriter(SERIAL_NO, Arrays.asList(files), baseName + '-' + PREFIX, compress, maxSize); assertNotNull(arcWriter); for (int i = 0; i < recordCount; i++) { writeRandomHTTPRecord(arcWriter, i); } arcWriter.close(); assertTrue("Doesn't exist: " + arcWriter.getFile().getAbsolutePath(), arcWriter.getFile().exists()); return arcWriter.getFile(); } private void validate(File arcFile, int recordCount) throws FileNotFoundException, IOException { ARCReader reader = ARCReaderFactory.get(arcFile); assertNotNull(reader); List metaDatas = null; if (recordCount == -1) { metaDatas = reader.validate(); } else { metaDatas = reader.validate(recordCount); } reader.close(); // Now, run through each of the records doing absolute get going from // the end to start. Reopen the arc so no context between this test // and the previous. reader = ARCReaderFactory.get(arcFile); for (int i = metaDatas.size() - 1; i >= 0; i--) { ARCRecordMetaData meta = (ARCRecordMetaData)metaDatas.get(i); ArchiveRecord r = reader.get(meta.getOffset()); String mimeType = r.getHeader().getMimetype(); assertTrue("Record is bogus", mimeType != null && mimeType.length() > 0); } reader.close(); assertTrue("Metadatas not equal", metaDatas.size() == recordCount); for (Iterator i = metaDatas.iterator(); i.hasNext();) { ARCRecordMetaData r = (ARCRecordMetaData)i.next(); assertTrue("Record is empty", r.getLength() > 0); } } public void testCheckARCFileSize() throws IOException { runCheckARCFileSizeTest("checkARCFileSize", false); } public void testCheckARCFileSizeCompressed() throws IOException { runCheckARCFileSizeTest("checkARCFileSize", true); } public void testWriteRecord() throws IOException { final int recordCount = 2; File arcFile = writeRecords("writeRecord", false, DEFAULT_MAX_ARC_FILE_SIZE, recordCount); validate(arcFile, recordCount + 1); // Header record. } public void testRandomAccess() throws IOException { final int recordCount = 3; File arcFile = writeRecords("writeRecord", true, DEFAULT_MAX_ARC_FILE_SIZE, recordCount); ARCReader reader = ARCReaderFactory.get(arcFile); // Get to second record. Get its offset for later use. boolean readFirst = false; String url = null; long offset = -1; long totalRecords = 0; boolean readSecond = false; for (final Iterator i = reader.iterator(); i.hasNext(); totalRecords++) { ARCRecord ar = (ARCRecord)i.next(); if (!readFirst) { readFirst = true; continue; } if (!readSecond) { url = ar.getMetaData().getUrl(); offset = ar.getMetaData().getOffset(); readSecond = true; } } reader = ARCReaderFactory.get(arcFile, offset); ArchiveRecord ar = reader.get(); assertEquals(ar.getHeader().getUrl(), url); ar.close(); // Get reader again. See how iterator works with offset reader = ARCReaderFactory.get(arcFile, offset); int count = 0; for (final Iterator i = reader.iterator(); i.hasNext(); i.next()) { count++; } reader.close(); assertEquals(totalRecords - 1, count); } public void testWriteRecordCompressed() throws IOException { final int recordCount = 2; File arcFile = writeRecords("writeRecordCompressed", true, DEFAULT_MAX_ARC_FILE_SIZE, recordCount); validate(arcFile, recordCount + 1 /*Header record*/); } private void runCheckARCFileSizeTest(String baseName, boolean compress) throws FileNotFoundException, IOException { writeRecords(baseName, compress, 1024, 15); // Now validate all files just created. File [] files = FileUtils.getFilesWithPrefix(getTmpDir(), PREFIX); for (int i = 0; i < files.length; i++) { validate(files[i], -1); } } protected ARCWriter createARCWriter(String NAME, boolean compress) { File [] files = {getTmpDir()}; return new ARCWriter(SERIAL_NO, Arrays.asList(files), NAME, compress, DEFAULT_MAX_ARC_FILE_SIZE); } protected static ByteArrayOutputStream getBaos(String str) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); baos.write(str.getBytes()); return baos; } protected static void writeRecord(ARCWriter writer, String url, String type, int len, ByteArrayOutputStream baos) throws IOException { writer.write(url, type, "192.168.1.1", (new Date()).getTime(), len, baos); } protected int iterateRecords(ARCReader r) throws IOException { int count = 0; for (Iterator i = r.iterator(); i.hasNext();) { ARCRecord rec = (ARCRecord)i.next(); rec.close(); if (count != 0) { assertTrue("Unexpected URL " + rec.getMetaData().getUrl(),
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -