?? getcnkireference1.java
字號:
package cn.ac.cintcm.spider.cnki;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import org.htmlparser.Node;
import org.htmlparser.beans.FilterBean;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
import cn.ac.cintcm.spider.GetUrlContent;
public class GetCnkiReference1 {
/**
* @param args
*/
public static void main(String[] args)throws IOException {
// TODO Auto-generated method stub
// String url="http://cnki.net/login/autonavi.aspx?id=6";
String url="http://lsg.cnki.net/grid20/cache/cacherefre3.aspx?filename=ZYXB200301015&dbname=CJFD2003";
GetUrlContent guc=new GetUrlContent(url);
String content=guc.getContent();
// String url="http://lsg.cnki.net/grid20/detail.aspx?filename=WXZZ200402032&dbname=CJFD2004";
// ParseCnkiDetail pc=new ParseCnkiDetail();
// String content=pc.getTitleDetail(url);
// System.out.println(content);
// String url="http://lsg.cnki.net/grid20/detail.aspx?filename=WXZZ200402032&dbname=CJFD2004";
//// String url="http://lsg.cnki.net/grid20/cache/cacherefre1.aspx?filename=WXZZ200401025&dbname=CJFD2004";
// ParseCnkiDetail pc=new ParseCnkiDetail();
//// pc.getRefareFrameContent(url,"TABLE","boder","0");
// pc.getTitleDetail(url);
BufferedWriter out = new BufferedWriter(new FileWriter("out2.txt"));
out.write(content);
out.close();
String result="";
NodeList list=NodeFilters.getNodeList(content,"TD","width","");
// System.out.println(list.size());
for(Node node:list.toNodeArray()){
int sign=0;
String records=null;
for(Node node1:node.getChildren().toNodeArray()){
if(node1.toString().indexOf("p style='line-height:150")>0) {
if(sign==1){
result=result+records;
System.out.println(records);
sign--;
}
NodeList tableNodes=node1.getChildren().extractAllNodesThatMatch(NodeFilters.getSingleFilter("TABLE"));
records=tableNodes.elementAt(0).toPlainTextString().replaceAll("\r\n","");
// records=node1.toPlainTextString().replaceAll("\r\n","").trim();
sign++;
}
if(node1.toString().indexOf(">>更多")>0){
sign--;
LinkTag linkTag=new LinkTag();
linkTag.setText(node1.getChildren().elementAt(0).toHtml());
String href=linkTag.getAttribute("href");
String url1="http://lsg.cnki.net/grid20"+href.substring(2);
System.out.println(url1);
}
}
// System.out.println(node.toPlainTextString().trim());
// System.out.println(node.getChildren().extractAllNodesThatMatch(NodeFilters.getSingleFilter("div")).size()==0);
// System.out.println(node.getChildren().extractAllNodesThatMatch(NodeFilters.getSingleFilter("DIV")).size()<0);
// if(node.getChildren().extractAllNodesThatMatch(NodeFilters.getSingleFilter("DIV")).size()>0){
// contentAll = contentAll+getMoreRefareFrameContent();
// }
// else{
// NodeList tableNode=node.getChildren().extractAllNodesThatMatch(NodeFilters.getSingleFilter("TABLE"));
// for(Node node1: tableNode.toNodeArray()){
// System.out.println(node1.toPlainTextString());
// }
// content = content+tableNode.elementAt(0).toPlainTextString().trim();
// System.out.println(contentAll);
// }
}
//
// FilterBean bean=NodeFilters.getFilterBeans(content,"DIV","align","right");
// System.out.println(bean.getText());
// System.out.println(content);
// System.out.println(result);
System.out.println("suc");
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -