?? gather.java
字號:
package gather;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.ConnectException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.net.URLConnection;
import java.net.UnknownHostException;
import java.util.Date;
public class gather extends Thread
{
// -------------------------------------------------------
// 變量定義區(qū):
static int nRunCount = 0;
int nID = 0;
String strurl;
URL url;
URLConnection urlconnection;
InputStream inputstream;
BufferedReader bufreader;
boolean bSaveToFile;
private Date date;
private boolean exit;
// -------------------------------------------------------
private String[] strCNMsgOfResponse = new String[1024];// 用于存儲http返回值對應(yīng)的中文意思
private final int nMaxTimeOfTimeout=15*60000;//網(wǎng)絡(luò)超時時間;
private int nTimeOfTimeout=60000;//網(wǎng)絡(luò)超時時間;
private int nCountOfTimeout=0;//網(wǎng)絡(luò)超時次數(shù);
// -------------------------------------------------------
public gather()
{
initgather();
}
public gather(String strInUrl,boolean bIn)
{
this.strurl=strInUrl;
this.bSaveToFile=bIn;
initgather();
}
public void initgather()
{
date = new Date();
// ------------------------------------------
int i = 0;
for (i = 0; i < 1024; i++)
{
strCNMsgOfResponse[i] = "";
}
strCNMsgOfResponse[201] = "";
strCNMsgOfResponse[202] = "已經(jīng)接受請求,但處理尚未完成";
strCNMsgOfResponse[203] = "文檔已經(jīng)正常地返回,但一些應(yīng)答頭可能不正確,因為使用的是文檔的拷貝";
strCNMsgOfResponse[204] = "沒有新文檔,瀏覽器應(yīng)該繼續(xù)顯示原來的文檔";
strCNMsgOfResponse[205] = "";
strCNMsgOfResponse[206] = "";
strCNMsgOfResponse[300] = "";
strCNMsgOfResponse[301] = "";
strCNMsgOfResponse[302] = "";
strCNMsgOfResponse[303] = "";
strCNMsgOfResponse[304] = "";
strCNMsgOfResponse[305] = "";
strCNMsgOfResponse[307] = "";
strCNMsgOfResponse[400] = "";
strCNMsgOfResponse[401] = "";
strCNMsgOfResponse[402] = "";
strCNMsgOfResponse[403] = "";
strCNMsgOfResponse[404] = "無法找到指定位置的資源,該頁面不存在";
strCNMsgOfResponse[405] = "";
strCNMsgOfResponse[406] = "";
strCNMsgOfResponse[407] = "";
strCNMsgOfResponse[408] = "";
strCNMsgOfResponse[409] = "";
strCNMsgOfResponse[410] = "";
strCNMsgOfResponse[411] = "";
strCNMsgOfResponse[412] = "";
strCNMsgOfResponse[413] = "";
strCNMsgOfResponse[414] = "";
strCNMsgOfResponse[415] = "";
strCNMsgOfResponse[416] = "";
strCNMsgOfResponse[417] = "";
strCNMsgOfResponse[500] = "";
strCNMsgOfResponse[501] = "";
strCNMsgOfResponse[502] = "";
strCNMsgOfResponse[503] = "";
strCNMsgOfResponse[504] = "";
strCNMsgOfResponse[505] = "";
strCNMsgOfResponse[999] = "服務(wù)器返回:999。不明錯誤。";
}
/**
* 功 能:線程結(jié)束 輸入?yún)?shù): 輸出參數(shù): 返 回 值: 備 注: 作 者:
*/
public void exitThread()
{
exit = true;
}
private void establishConnection()throws Exception
{
int nHttpResponseCode = -1;
String strHttpResponse = "";
String strResponseMsg = "";
String strErrorMsg = "";
String strDateString = "";
try
{
date.getTime();
strDateString = "[" + date.toLocaleString() + "] ";
url = new URL(this.strurl);
// -------------------------------------------------------
// 開始建立連接:
urlconnection = url.openConnection();// 不使用代理
// 設(shè)置連接超時時間:
urlconnection.setReadTimeout(nTimeOfTimeout);
// -------------------------------------------------------
// 連接之后:獲取返回值和返回消息:
if (urlconnection instanceof HttpURLConnection)
{
try
{
HttpURLConnection httpurlconn = (HttpURLConnection) urlconnection;// 轉(zhuǎn)換成HttpURLConnection
nHttpResponseCode = httpurlconn.getResponseCode();// 獲取http服務(wù)器的返回值
strHttpResponse = httpurlconn.getResponseMessage();// 獲取http服務(wù)器的返回消息
if (nHttpResponseCode >= 1024)
strErrorMsg = "服務(wù)器返回值大于1024.[" + this.strurl + "]";
else if (nHttpResponseCode < 0)
strErrorMsg = "服務(wù)器返回值小于0.[" + this.strurl + "]";
else
strResponseMsg = "服務(wù)器返回:[" + nHttpResponseCode + "," + strHttpResponse + "] " + strCNMsgOfResponse[nHttpResponseCode];
if (nHttpResponseCode > 600)// 測試用:[2007-10-20]
System.out.println(strResponseMsg + "URL:" + this.strurl);
}
catch (IOException ex)
{
strErrorMsg = " 讀取服務(wù)器返回值時發(fā)生IO錯誤,需重新連接。[" + ex + "]";
}
catch (Exception ex)
{
strErrorMsg = " 讀取服務(wù)器返回值時發(fā)生不明錯誤,---->[" + ex + "]";
}
}
// -------------------------------------------------------
// 如果連接成功:使用Writer對象的writeData方法記錄網(wǎng)頁數(shù)據(jù):
if ((nHttpResponseCode == 200) || (strHttpResponse.equalsIgnoreCase("ok")))
{
// System.out.println(" 已連接成功![" +this.strurl+"]-->nCount:" + nRunCount++);
// System.out.println(" 已連接成功![" +this.strurl+"]-->nCount:" + nRunCount+++this.bSaveToFile);
new Writer().writeData(urlconnection, this.strurl, this.bSaveToFile);
}
else
{
// 在此記錄連接失敗的返回值和返回消息:
if (strResponseMsg.isEmpty())
strResponseMsg = "連接失敗! 程序運(yùn)行正常,請檢查網(wǎng)絡(luò)或者代理或者所輸入的URL";
strErrorMsg = strResponseMsg;
}
}
catch (MalformedURLException e_url)
{
strErrorMsg = " 輸入的URL不正確!";
}
catch (ConnectException e_con)
{// 連接錯誤:
strErrorMsg = " 連接錯誤:不能連接到服務(wù)器!請檢查網(wǎng)絡(luò)連接或代理設(shè)置";
}
catch (UnknownHostException e_con)
{// 錯誤的服務(wù)器或不明主機(jī):
strErrorMsg = " 錯誤的服務(wù)器或不明主機(jī)! 請檢查URL是否正確";
}
catch (SocketTimeoutException e_con)
{// 連接超時:
strErrorMsg = " 連接超時!請檢查網(wǎng)絡(luò)連接或代理設(shè)置!";
nCountOfTimeout++;
//如果超時次數(shù)大于100次,則需要調(diào)制超時時間:
if(nCountOfTimeout>=100)
{
nCountOfTimeout=0;
if(nTimeOfTimeout<nMaxTimeOfTimeout)
{
nTimeOfTimeout*=2;
System.out.println("已調(diào)整網(wǎng)絡(luò)鏈接超時時間為:"+nTimeOfTimeout);
}
}
}
catch (FileNotFoundException e_url)
{
strErrorMsg = " 該頁面不存在! ";
}
catch (Exception e_url)
{
strErrorMsg = " !不明錯誤![" + e_url + "]";
e_url.printStackTrace();
}
if (!strErrorMsg.isEmpty())
{// 如果產(chǎn)生錯誤:則同時記錄下此時的時間:
strErrorMsg = strDateString + strErrorMsg;
System.out.println("URL:"+this.strurl + strErrorMsg);
}
}
/**
* 功 能:線程的固定函數(shù):run() :
*/
public void run()
{
if (this.strurl == null || this.strurl.isEmpty())
return;
controler.nThreadCount++;
// 開始執(zhí)行任務(wù):
// System.out.println(" 任務(wù)開始 。當(dāng)前線程數(shù):"+controler.nThreadCount+" ["+this.strurl+"]");
try
{
this.strurl = this.strurl.toLowerCase();
if (!this.strurl.startsWith("http://") && !this.strurl.startsWith("ftp://"))
{
this.strurl = "http://" + this.strurl;
}
this.strurl = this.strurl.replace('\\', '/');
// ------------------------------------------------------
// 開始建立連接:
establishConnection();
}
catch (Exception e)
{
System.out.println("ERROR:-->" + e);// 輸出Crawler的錯誤信息
}
// --------------------------------------------
finally
{
controler.nThreadCount--;
}
// System.out.println("線程已經(jīng)停止。");
}
public static void main(String[] args) throws Exception
{
//運(yùn)行入口:
//在此輸入種子網(wǎng)址:
try
{
gather gg = new gather();
gg.strurl = "http://www.ivsky.com/";
gg.bSaveToFile=false;
gg.start();
controler.URLList.add(gg.strurl);
}
catch(Exception e)
{
e.printStackTrace();
}
}
}
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -