?? html2xml.java
字號:
import java.net.URL;
import java.io.*;
import org.w3c.tidy.Configuration;
import org.w3c.tidy.Tidy;
public class HTML2XML {
private String url;
private String outFileName;
private String errOutFileName;
public HTML2XML(String url,String outFileName,String
errOutFileName) {
this.url = url;
this.outFileName = outFileName;
this.errOutFileName = errOutFileName;
}
public void convert() {
URL u;
BufferedInputStream in;
FileOutputStream out;
Tidy tidy = new Tidy();
//Tell Tidy to convert HTML to XML
tidy.setXmlOut(true);
//tidy.setDropFontTags(true); // 刪除字體節點
//tidy.setDropEmptyParas(true); // 刪除空段落
//tidy.setFixComments(true); // 修復注釋
//tidy.setFixBackslash(true); // 修復反斜桿
//tidy.setMakeClean(true); // 刪除混亂的表示
//tidy.setQuoteNbsp(false); // 將空格輸出為
//tidy.setQuoteMarks(false); // 將雙引號輸出為 "
//tidy.setQuoteAmpersand(true); // 將 & 輸出為 &
tidy.setCharEncoding(Configuration.RAW);
tidy.setXmlPi(true);
try {
//Set file for error messages
tidy.setErrout(new PrintWriter(new FileWriter(errOutFileName), true));
u = new URL(url);
//Create input and output streams
in = new BufferedInputStream(u.openStream());
out = new FileOutputStream(outFileName);
//Convert files
tidy.parse(in, out);
//Clean up
in.close();
out.close();
}
catch (IOException e) {
System.out.println(this.toString() + e.toString());
}
}
public static void main(String args[]) {
/*
* Parameters are:
* URL of HTML file
* Filename of output file r
* Filename of error file
*/
HTML2XML t = new HTML2XML("http://www.imdb.com/title/tt0068646/","wanfang.xml","wanfang.txt");
t.convert();
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -