?? spider.java
字號:
import java.net.*;
import java.io.*;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.regex.*;
import java.io.IOException;
import java.sql.*;
public class Spider
{
String sourceURL;//需要采集的網頁網址
String sourceContent;//網頁頁面內容
static String company;//公司名
ArrayList<String> matchContent = new ArrayList<String>();//網頁內容匹配區域
public static void main(String[] args)
{
try {
System.out.print("Please give the company:");
BufferedReader stdin =
new BufferedReader(new InputStreamReader(System.in));
company=stdin.readLine();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} ;
Spider urls21=new Spider("http://finance.yahoo.com/q/hp?s=");
urls21.alllink();
urls21.insertdb();
urls21.inserintodb();
}
public Spider()
{
}
public Spider(String sourceURL1)
{
sourceURL=sourceURL1+company;
}
//獲取網頁頁面內容
public void getSourceContent(String URLStr)
{
StringBuffer sb=new StringBuffer();
try
{
URL newURL=new URL(URLStr);
BufferedReader br=new BufferedReader(
new InputStreamReader(newURL.openStream()));
String temp;
while(( temp=br.readLine())!=null)
{
sb.append(temp);
}
sourceContent=sb.toString();
}
catch(MalformedURLException e)
{
e.printStackTrace();
}
catch(IOException e)
{
e.printStackTrace();
}
}
//獲取匹配區域數據
public void getMatchContent()
{
Pattern p=Pattern.compile("align=\"right\">(.*?)</td>");
Matcher match=p.matcher(sourceContent);
if(match.find())
{
while (match.find())
{
System.out.println(match.group(1));
matchContent.add(match.group(1));//獲取被匹配的部分
}
}
}
public void alllink( )
{
String link = null;
String linkURL;
int flag = 0;
int last = 0;
String matchtemp;
getSourceContent(sourceURL);
getMatchContent();
Pattern p_next=Pattern.compile("<a href=\"/q/hp\\?s="+company+"(.*?)\">Next</a>");
Matcher match_next=p_next.matcher(sourceContent);
while (match_next.find())
{
flag++;
matchtemp=match_next.group(1);
if (flag ==1 )
{
matchtemp = matchtemp.replaceAll("amp;", "");
int index = matchtemp.indexOf("y=");
link = matchtemp.substring(0,index+2);
}
else if (flag ==2)
{
p_next=Pattern.compile("y=(.*?)\">");
match_next=p_next.matcher(matchtemp);
while (match_next.find())
{
last = Integer.parseInt(match_next.group(1));
}
}
}
for (int i=1;i <= last/66;i++)
{
linkURL = sourceURL + link + Integer.toString(i*66);
getSourceContent(linkURL);
getMatchContent();
}
}
public void insertdb()
{
String context = null;
int counter = 0;
for (Iterator iterator = matchContent.iterator(); iterator.hasNext();) {
if ((counter%7 != 0) &&( counter != 0)&&(counter%7 != 6) &&counter != 0)
{
counter++;
context = context + (String) iterator.next()+"\t";
}
else if(counter%7 == 0&&( counter != 0))
{
context =context + "\r\n";
context = context + company;
context = context +"\t";
String s=(String) iterator.next();
context = context + todate(s)+"\t";
counter++;
}
else if(counter%7 == 6)
{
context=context+(String) iterator.next();
counter++;
}
else
{
context = company;
context = context +"\t";
String s=(String) iterator.next();
context = context + todate(s)+"\t";
counter++;
}
}
BufferedWriter output = null;
try {
output = new BufferedWriter(new FileWriter("E:\\stockdate.txt"));
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} //寫入流
try {
output.write(context);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} //s1為寫入的字符串
try {
output.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void inserintodb()
{
try {
Class.forName("com.mysql.jdbc.Driver").newInstance();
} catch (InstantiationException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
} catch (IllegalAccessException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
} catch (ClassNotFoundException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
Connection connection = null;
ResultSet re = null;
Statement stmt = null;
try {
connection = DriverManager.getConnection("jdbc:mysql://localhost/stock","root","123456");
stmt = connection.createStatement();
String sql="LOAD DATA LOCAL INFILE 'E://stockdate.txt' INTO TABLE yahoostock";
re = stmt.executeQuery(sql);
} catch (SQLException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Template
}
try {
re.close();
} catch (SQLException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
try {
stmt.close();
} catch (SQLException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
try {
connection.close();
} catch (SQLException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
}
public String todate(String date)
{
String[] temparr;
temparr = date.split("-");
if(temparr[1].equalsIgnoreCase("Jan"))
{
temparr[1]= "01";
}
else if(temparr[1].equalsIgnoreCase("Feb"))
{
temparr[1]= "02";
}
else if(temparr[1].equalsIgnoreCase("Mar"))
{
temparr[1] = "03";
}
else if(temparr[1].equalsIgnoreCase("Apr"))
{
temparr[1]="04";
}
else if(temparr[1].equalsIgnoreCase("May"))
{
temparr[1]= "05";
}
else if(temparr[1].equalsIgnoreCase("Jun"))
{
temparr[1]="06";
}
else if(temparr[1].equalsIgnoreCase("Jul"))
{
temparr[1]="07";
}
else if(temparr[1].equalsIgnoreCase("Aug"))
{
temparr[1]= "08";
}
else if(temparr[1].equalsIgnoreCase("Sep"))
{
temparr[1]= "09";
}
else if(temparr[1].equalsIgnoreCase("Oct"))
{
temparr[1]= "10";
}
else if(temparr[1].equalsIgnoreCase("Nov"))
{
temparr[1]= "11";
}
else if(temparr[1].equalsIgnoreCase("Dec"))
{
temparr[1]= "12";
}
date = temparr[2]+temparr[1]+temparr[0];
return date;
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -