?? segmentwriter.java
字號:
package net.nutch.segment;
import java.io.File;
import java.io.IOException;
import java.net.*;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.log4j.*;
import net.nutch.fs.LocalFileSystem;
import net.nutch.fs.NutchFileSystem;
import net.nutch.io.ArrayFile;
import net.nutch.parse.ParseData;
import net.nutch.parse.ParseText;
public class SegmentWriter {
public static final Logger LOG = Logger.getLogger("segment");
public static final String SUM_DONE_NAME = "summary.done";
public ArrayFile.Writer parseTextWriter;
public ArrayFile.Writer parseDataWriter;
public long size = 0L;
public File segmentDir;
public SegmentWriter(File dir, boolean force) throws Exception {
this(new LocalFileSystem(), dir, force, true, true);
}
public SegmentWriter(NutchFileSystem nfs, File dir, boolean force) throws Exception {
this(nfs, dir, force, true, true);
}
/**
* Open a segment for writing. When a segment is open, its data files are created.
*
* @param nfs NutchFileSystem to use
* @param dir directory to contain the segment data
* @param force if true, and segment directory already exists and its content
* is in the way, sliently overwrite that content as needed.
* If false and the above condition arises, throw an Exception. Note: this
* doesn't result in an Exception, if force=false, and the target directory
* already exists, but contains other data not conflicting with the segment
* data.
* @param withParseText if true, write ParseText, otherwise ignore it. NOTE: if isParsed is
* false, this will be automaticaly set to false, too.
* @param withParseData if true, write ParseData, otherwise ignore it. NOTE: if isParsed is
* false, this will be automaticaly set to false, too.
* @throws Exception
*/
public SegmentWriter(NutchFileSystem nfs, File dir, boolean force,
boolean withParseText, boolean withParseData) throws Exception {
segmentDir = dir;
if (!nfs.exists(segmentDir)) {
nfs.mkdirs(segmentDir);
}
File out = null;
if (withParseText) {
out = new File(dir, ParseText.DIR_NAME);
if (nfs.exists(out) && !force) {
throw new Exception("Output directory " + out + " already exists.");
}
parseTextWriter = new ArrayFile.Writer(nfs, out.toString(), ParseText.class);
}
if (withParseData) {
out = new File(dir, ParseData.DIR_NAME);
if (nfs.exists(out) && !force) {
throw new Exception("Output directory " + out + " already exists.");
}
parseDataWriter = new ArrayFile.Writer(nfs, out.toString(), ParseData.class);
}
}
/** Create a new segment name */
public static String getNewSegmentName() {
String ip = "";
try{
InetAddress localHost = InetAddress.getLocalHost();
ip = localHost.getHostAddress();
}catch(Exception e){
LOG.warn("get local host ip error\n"+e.toString());
}
if (ip == null || ip.length()==0 || ip.equals("127.0.0.1")){
LOG.warn("get local host ip error");
ip = "";
}else
ip += ".";
try{
//File busy = new File("segmentName.lock");
//while (!busy.createNewFile()){
// Thread.sleep(1000);
//}
String dateStr = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date(System.currentTimeMillis()));
Thread.sleep(1000);
//busy.delete();
return ip + dateStr;
}catch(Exception e){
return null;
}
}
/** Sets the index interval for all segment writers. */
public synchronized void setIndexInterval(int interval) throws IOException {
if (parseTextWriter != null) parseTextWriter.setIndexInterval(interval);
if (parseDataWriter != null) parseDataWriter.setIndexInterval(interval);
}
private ParseText _pt = new ParseText();
private ParseData _pd = new ParseData();
/**
* Append new values to the output segment.
* <p>NOTE: if this segment writer has some data files open, but the respective
* arguments are null, empty values will be written instead.</p>
* @param pt parseText, may be null (but see the note above)
* @param pd parseData, may be null (but see the note above)
* @throws IOException
*/
public synchronized void append(ParseText pt, ParseData pd) throws IOException {
if (parseTextWriter != null) {
if (pt == null) pt = _pt;
parseTextWriter.append(pt);
}
if (parseDataWriter != null) {
if (pd == null) pd = _pd;
parseDataWriter.append(pd);
}
size++;
}
/** Close all writers. */
public void close() {
if (parseTextWriter != null) try {
parseTextWriter.close();
} catch (Exception e) {
LOG.error("Exception closing parseTextWriter: " + e.getMessage());
}
if (parseDataWriter != null) try {
parseDataWriter.close();
} catch (Exception e) {
LOG.error("Exception closing parseDataWriter: " + e.getMessage());
}
}
public static void main(String[] args) {}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -