?? arcwritertest.java
字號:
/* ARCWriterTest * * $Id: ARCWriterTest.java 5478 2007-09-19 01:37:07Z gojomo $ * * Created on Dec 31, 2003. * * Copyright (C) 2003 Internet Archive. * * This file is part of the Heritrix web crawler (crawler.archive.org). * * Heritrix is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * Heritrix is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with Heritrix; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */package org.archive.io.arc;import java.io.ByteArrayInputStream;import java.io.ByteArrayOutputStream;import java.io.File;import java.io.FileNotFoundException;import java.io.IOException;import java.io.OutputStream;import java.io.PrintStream;import java.util.Arrays;import java.util.Date;import java.util.Iterator;import java.util.List;import java.util.concurrent.atomic.AtomicInteger;import org.apache.commons.io.IOUtils;import org.apache.commons.io.input.NullInputStream;import org.apache.commons.io.output.NullOutputStream;import org.archive.io.ArchiveRecord;import org.archive.io.ReplayInputStream;import org.archive.io.WriterPoolMember;import org.archive.util.ArchiveUtils;import org.archive.util.FileUtils;import org.archive.util.TmpDirTestCase;/** * Test ARCWriter class. * * This code exercises ARCWriter AND ARCReader. First it writes ARCs w/ * ARCWriter. Then it validates what was written w/ ARCReader. * * @author stack */public class ARCWriterTestextends TmpDirTestCase implements ARCConstants { /** Utility class for writing bad ARCs (with trailing junk) */ public class CorruptibleARCWriter extends ARCWriter { byte[] endJunk = null; public CorruptibleARCWriter(AtomicInteger serial_no, List<File> name, String name2, boolean compress, long default_max_arc_file_size) { super(serial_no,name,name2,compress,default_max_arc_file_size); } @Override protected void postWriteRecordTasks() throws IOException { if(endJunk!=null) { this.write(endJunk); } super.postWriteRecordTasks(); } public void setEndJunk(byte[] b) throws IOException { this.endJunk = b; } } /** * Prefix to use for ARC files made by JUNIT. */ private static final String SUFFIX = /* TODO DEFAULT_ARC_FILE_PREFIX*/ "JUNIT"; private static final String SOME_URL = "http://www.archive.org/test/"; private static final AtomicInteger SERIAL_NO = new AtomicInteger(); /* * @see TestCase#setUp() */ protected void setUp() throws Exception { super.setUp(); } /* * @see TestCase#tearDown() */ protected void tearDown() throws Exception { super.tearDown(); } protected static String getContent() { return getContent(null); } protected static String getContent(String indexStr) { String page = (indexStr != null)? "Page #" + indexStr: "Some Page"; return "HTTP/1.1 200 OK\r\n" + "Content-Type: text/html\r\n\r\n" + "<html><head><title>" + page + "</title></head>" + "<body>" + page + "</body></html>"; } protected int writeRandomHTTPRecord(ARCWriter arcWriter, int index) throws IOException { String indexStr = Integer.toString(index); ByteArrayOutputStream baos = new ByteArrayOutputStream(); // Start the record with an arbitrary 14-digit date per RFC2540 String now = ArchiveUtils.get14DigitDate(); int recordLength = 0; byte[] record = (getContent(indexStr)).getBytes(); recordLength += record.length; baos.write(record); // Add the newline between records back in baos.write("\n".getBytes()); recordLength += 1; arcWriter.write("http://www.one.net/id=" + indexStr, "text/html", "0.1.2.3", Long.parseLong(now), recordLength, baos); return recordLength; } private File writeRecords(String baseName, boolean compress, long maxSize, int recordCount) throws IOException { cleanUpOldFiles(baseName); File [] files = {getTmpDir()}; ARCWriter arcWriter = new ARCWriter(SERIAL_NO, Arrays.asList(files), baseName + '-' + SUFFIX, compress, maxSize); assertNotNull(arcWriter); for (int i = 0; i < recordCount; i++) { writeRandomHTTPRecord(arcWriter, i); } arcWriter.close(); assertTrue("Doesn't exist: " + arcWriter.getFile().getAbsolutePath(), arcWriter.getFile().exists()); return arcWriter.getFile(); } private void validate(File arcFile, int recordCount) throws FileNotFoundException, IOException { ARCReader reader = ARCReaderFactory.get(arcFile); assertNotNull(reader); List metaDatas = null; if (recordCount == -1) { metaDatas = reader.validate(); } else { metaDatas = reader.validate(recordCount); } reader.close(); // Now, run through each of the records doing absolute get going from // the end to start. Reopen the arc so no context between this test // and the previous. reader = ARCReaderFactory.get(arcFile); for (int i = metaDatas.size() - 1; i >= 0; i--) { ARCRecordMetaData meta = (ARCRecordMetaData)metaDatas.get(i); ArchiveRecord r = reader.get(meta.getOffset()); String mimeType = r.getHeader().getMimetype(); assertTrue("Record is bogus", mimeType != null && mimeType.length() > 0); } reader.close(); assertTrue("Metadatas not equal", metaDatas.size() == recordCount); for (Iterator i = metaDatas.iterator(); i.hasNext();) { ARCRecordMetaData r = (ARCRecordMetaData)i.next(); assertTrue("Record is empty", r.getLength() > 0); } } public void testCheckARCFileSize() throws IOException { runCheckARCFileSizeTest("checkARCFileSize", false); } public void testCheckARCFileSizeCompressed() throws IOException { runCheckARCFileSizeTest("checkARCFileSize", true); } public void testWriteRecord() throws IOException { final int recordCount = 2; File arcFile = writeRecords("writeRecord", false, DEFAULT_MAX_ARC_FILE_SIZE, recordCount); validate(arcFile, recordCount + 1); // Header record. } public void testRandomAccess() throws IOException { final int recordCount = 3; File arcFile = writeRecords("writeRecord", true, DEFAULT_MAX_ARC_FILE_SIZE, recordCount); ARCReader reader = ARCReaderFactory.get(arcFile); // Get to second record. Get its offset for later use. boolean readFirst = false; String url = null; long offset = -1; long totalRecords = 0; boolean readSecond = false; for (final Iterator i = reader.iterator(); i.hasNext(); totalRecords++) { ARCRecord ar = (ARCRecord)i.next(); if (!readFirst) { readFirst = true; continue; } if (!readSecond) { url = ar.getMetaData().getUrl(); offset = ar.getMetaData().getOffset(); readSecond = true; } } reader = ARCReaderFactory.get(arcFile, offset); ArchiveRecord ar = reader.get(); assertEquals(ar.getHeader().getUrl(), url); ar.close(); // Get reader again. See how iterator works with offset reader = ARCReaderFactory.get(arcFile, offset); int count = 0; for (final Iterator i = reader.iterator(); i.hasNext(); i.next()) { count++; } reader.close(); assertEquals(totalRecords - 1, count); } public void testWriteRecordCompressed() throws IOException { final int recordCount = 2; File arcFile = writeRecords("writeRecordCompressed", true, DEFAULT_MAX_ARC_FILE_SIZE, recordCount); validate(arcFile, recordCount + 1 /*Header record*/); } public void testWriteGiantRecord() throws IOException { File [] files = {getTmpDir()}; PrintStream dummyStream = new PrintStream(new NullOutputStream()); ARCWriter arcWriter = new ARCWriter(SERIAL_NO, dummyStream, new File("dummy"), false, null, null); assertNotNull(arcWriter); // Start the record with an arbitrary 14-digit date per RFC2540 long now = System.currentTimeMillis(); long recordLength = org.apache.commons.io.FileUtils.ONE_GB * 3; arcWriter.write("dummy:uri", "application/octet-stream", "0.1.2.3", now, recordLength, new NullInputStream(recordLength)); arcWriter.close(); } private void runCheckARCFileSizeTest(String baseName, boolean compress) throws FileNotFoundException, IOException { writeRecords(baseName, compress, 1024, 15); // Now validate all files just created. File [] files = FileUtils.getFilesWithPrefix(getTmpDir(), SUFFIX); for (int i = 0; i < files.length; i++) { validate(files[i], -1); } } protected CorruptibleARCWriter createARCWriter(String NAME, boolean compress) { File [] files = {getTmpDir()}; return new CorruptibleARCWriter(SERIAL_NO, Arrays.asList(files), NAME, compress, DEFAULT_MAX_ARC_FILE_SIZE); }
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -