?? webcrawler.java
字號:
package chapter2;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.URL;
import java.net.URLConnection;
import java.net.UnknownHostException;
public class WebCrawler {
private static String Text_File_Path = "c:\\workshop\\ch2\\htmlsrc.html";
public static void main(String[] args) throws IOException {
try {
File file = new File(Text_File_Path);
FileWriter fpWriter = new FileWriter(file);
// 生成下載對象
URL url = new URL("http://www.bnu.edu.cn");
// 創建代理服務器
InetSocketAddress addr = new InetSocketAddress("172.17.18.84",
8080);
Proxy proxy = new Proxy(Proxy.Type.HTTP, addr); // http 代理
// 如果我們知道代理server的名字, 可以直接使用
// 結束
URLConnection conn = url.openConnection(proxy);
InputStream in = conn.getInputStream();
//Socket webclient = new Socket("www.bnu.edu.cn", 80);
//PrintWriter result = new PrintWriter(webclient.getOutputStream(), true);
//BufferedReader receiver = new BufferedReader(new InputStreamReader(webclient.getInputStream()));
BufferedReader receiver = new BufferedReader(new InputStreamReader(in));
//發送HTTP request請求
/*result.println("GET / HTTP/1.1");
result.println("Host: bnu.edu.cn");
result.println("Connection: Close");
result.println();*/
//接收HTTP Response 返回的結果信息
boolean bRet = true;
StringBuffer sBuffer = new StringBuffer(8096);
while (bRet) {
if (receiver.ready()) {
int idx = 0;
while (idx != -1) {
idx = receiver.read();
if(idx == '<')
break;
}
while (idx != -1) {
sBuffer.append((char) idx);
idx = receiver.read();
}
bRet = false;
}
}
// 顯示獲得的網頁正文,打印到控制臺
System.out.println(sBuffer.toString());
fpWriter.write(sBuffer.toString());
//webclient.close();
fpWriter.close();
} catch (UnknownHostException e) {
System.err.println("無法訪問指定主機.");
System.exit(1);
} catch (IOException e) {
System.err.println("下載失敗,請檢查輸入地址是否正確。");
System.exit(1);
}
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -