?? experimentalwarcwriter.java
字號:
// Insert length and pad out to fixed width with zero prefix to // highlight 'fixed-widthness' of length. int start = WARC_ID.length() + 1 /*HEADER_FIELD_SEPARATOR */; int end = start + PLACEHOLDER_RECORD_LENGTH_STRING.length(); String lenStr = RECORD_LENGTH_FORMATTER.format(length); sb.replace(start, end, lenStr); return sb.toString().getBytes(HEADER_LINE_ENCODING); } protected void writeRecord(final String type, final String url, final String create14DigitDate, final String mimetype, final URI recordId, ANVLRecord namedFields, final InputStream contentStream, final long contentLength) throws IOException { if (!TYPES_LIST.contains(type)) { throw new IllegalArgumentException("Unknown record type: " + type); } if (contentLength == 0 && (namedFields == null || namedFields.size() <= 0)) { throw new IllegalArgumentException("Cannot have a record made " + "of a Header line only (Content and Named Fields are empty)."); } preWriteRecordTasks(); try { if (namedFields == null) { // Use the empty anvl record so the length of blank line on // end gets counted as part of the record length. namedFields = ANVLRecord.EMPTY_ANVL_RECORD; } // Serialize metadata first so we have metadata length. final byte [] namedFieldsBlock = namedFields.getUTF8Bytes(); // Now serialize the Header line. final byte [] header = createRecordHeaderline(type, url, create14DigitDate, mimetype, recordId, namedFieldsBlock.length, contentLength); write(header); write(namedFieldsBlock); if (contentStream != null && contentLength > 0) { readFullyFrom(contentStream, contentLength, this.readbuffer); } // Write out the two blank lines at end of all records. // TODO: Why? Messes up skipping through file. Also not in grammar. write(CRLF_BYTES); write(CRLF_BYTES); } finally { postWriteRecordTasks(); } } protected URI generateRecordId(final Map<String, String> qualifiers) throws IOException { URI rid = null; try { rid = GeneratorFactory.getFactory(). getQualifiedRecordID(qualifiers); } catch (URISyntaxException e) { // Convert to IOE so can let it out. throw new IOException(e.getMessage()); } return rid; } protected URI generateRecordId(final String key, final String value) throws IOException { URI rid = null; try { rid = GeneratorFactory.getFactory(). getQualifiedRecordID(key, value); } catch (URISyntaxException e) { // Convert to IOE so can let it out. throw new IOException(e.getMessage()); } return rid; } public URI writeWarcinfoRecord(String filename) throws IOException { return writeWarcinfoRecord(filename, null); } public URI writeWarcinfoRecord(String filename, final String description) throws IOException { // Strip .open suffix if present. if (filename.endsWith(WriterPoolMember.OCCUPIED_SUFFIX)) { filename = filename.substring(0, filename.length() - WriterPoolMember.OCCUPIED_SUFFIX.length()); } ANVLRecord record = new ANVLRecord(2); record.addLabelValue(NAMED_FIELD_WARCFILENAME, filename); if (description != null && description.length() > 0) { record.addLabelValue(NAMED_FIELD_DESCRIPTION, description); } // Add warcinfo body. byte [] warcinfoBody = null; if (this.fileMetadata == null) { // TODO: What to write into a warcinfo? What to associate? warcinfoBody = "TODO: Unimplemented".getBytes(); } else { ByteArrayOutputStream baos = new ByteArrayOutputStream(); for (final Iterator i = this.fileMetadata.iterator(); i.hasNext();) { baos.write(i.next().toString().getBytes(UTF8Bytes.UTF8)); } warcinfoBody = baos.toByteArray(); } URI uri = writeWarcinfoRecord("text/plain", record, new ByteArrayInputStream(warcinfoBody), warcinfoBody.length); // TODO: If at start of file, and we're writing compressed, // write out our distinctive GZIP extensions. return uri; } /** * Write a warcinfo to current file. * TODO: Write crawl metadata or pointers to crawl description. * @param mimetype Mimetype of the <code>fileMetadata</code> block. * @param namedFields Named fields. Pass <code>null</code> if none. * @param fileMetadata Metadata about this WARC as RDF, ANVL, etc. * @param fileMetadataLength Length of <code>fileMetadata</code>. * @throws IOException * @return Generated record-id made with * <a href="http://en.wikipedia.org/wiki/Data:_URL">data: scheme</a> and * the current filename. */ public URI writeWarcinfoRecord(final String mimetype, final ANVLRecord namedFields, final InputStream fileMetadata, final long fileMetadataLength) throws IOException { final URI recordid = generateRecordId(TYPE, WARCINFO); writeWarcinfoRecord(ArchiveUtils.get14DigitDate(), mimetype, recordid, namedFields, fileMetadata, fileMetadataLength); return recordid; } /** * Write a <code>warcinfo</code> to current file. * The <code>warcinfo</code> type uses its <code>recordId</code> as its URL. * @param recordId URI to use for this warcinfo. * @param create14DigitDate Record creation date as 14 digit date. * @param mimetype Mimetype of the <code>fileMetadata</code>. * @param namedFields Named fields. * @param fileMetadata Metadata about this WARC as RDF, ANVL, etc. * @param fileMetadataLength Length of <code>fileMetadata</code>. * @throws IOException */ public void writeWarcinfoRecord(final String create14DigitDate, final String mimetype, final URI recordId, final ANVLRecord namedFields, final InputStream fileMetadata, final long fileMetadataLength) throws IOException { writeRecord(WARCINFO, recordId.toString(), create14DigitDate, mimetype, recordId, namedFields, fileMetadata, fileMetadataLength); } public void writeRequestRecord(final String url, final String create14DigitDate, final String mimetype, final URI recordId, final ANVLRecord namedFields, final InputStream request, final long requestLength) throws IOException { writeRecord(REQUEST, url, create14DigitDate, mimetype, recordId, namedFields, request, requestLength); } public void writeResourceRecord(final String url, final String create14DigitDate, final String mimetype, final ANVLRecord namedFields, final InputStream response, final long responseLength) throws IOException { writeResourceRecord(url, create14DigitDate, mimetype, getRecordID(), namedFields, response, responseLength); } public void writeResourceRecord(final String url, final String create14DigitDate, final String mimetype, final URI recordId, final ANVLRecord namedFields, final InputStream response, final long responseLength) throws IOException { writeRecord(RESOURCE, url, create14DigitDate, mimetype, recordId, namedFields, response, responseLength); } public void writeResponseRecord(final String url, final String create14DigitDate, final String mimetype, final URI recordId, final ANVLRecord namedFields, final InputStream response, final long responseLength) throws IOException { writeRecord(RESPONSE, url, create14DigitDate, mimetype, recordId, namedFields, response, responseLength); } public void writeMetadataRecord(final String url, final String create14DigitDate, final String mimetype, final URI recordId, final ANVLRecord namedFields, final InputStream metadata, final long metadataLength) throws IOException { writeRecord(METADATA, url, create14DigitDate, mimetype, recordId, namedFields, metadata, metadataLength); } /** * Convenience method for getting Record-Ids. * @return A record ID. * @throws IOException */ public static URI getRecordID() throws IOException { URI result; try { result = GeneratorFactory.getFactory().getRecordID(); } catch (URISyntaxException e) { throw new IOException(e.toString()); } return result; }}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -