?? dangdangparser.java
字號:
* Description: 獲得圖書出版時間
* Calls: no
* Called By: mainService
* @param bookElement as Element
* @return String
* @throws no
*/
public String getBookPublishTime(Element bookElement){
String bookPublishTime = null;
if(bookElement.hasChildNodes()){
String tem = bookElement.getFirstChild().getNodeValue();
if(tem.indexOf("出版時間") != -1&&tem.indexOf("年") != -1&&tem.indexOf("月") != -1)
bookPublishTime = tem.substring(tem.indexOf("出版時間")+5, tem.indexOf("年"))
+ "-"
+ tem.substring(tem.indexOf("年")+1, tem.indexOf("月"))
+ "-00";
}
//System.out.println(bookPublishTime);
return bookPublishTime;
}
/**
* Function: getBookPrice
* Description: 獲得圖書價格
* Calls: no
* Called By: mainService
* @param bookElement as Element
* @return String
* @throws no
*/
public String getBookPrice(Element bookElement){
String bookPrice = "";
NodeList priceNode = bookElement.getChildNodes();
if(priceNode.getLength()>2){
Node allPriceNode = priceNode.item(2);
if(allPriceNode.getNodeType() == Node.ELEMENT_NODE){
Element temElement = (Element)allPriceNode;
if(temElement.hasChildNodes())
bookPrice = temElement.getFirstChild().getNodeValue();
}
}
//System.out.println(bookPrice);
bookPrice = bookPrice.replace("¥", "");
bookPrice = bookPrice.replace(" ", "");
bookPrice = bookPrice.replace(",", "");
bookPrice = bookPrice.replace(",", "");
// if(bookPrice.length()>=1){
// bookPrice = bookPrice.substring(1);
// }
return bookPrice.trim();
}
/**
* Function: getBookDiscount
* Description: 獲得圖書折扣
* Calls: no
* Called By: mainService
* @param bookElement as Element
* @return String
* @throws no
*/
public String getBookDiscount(Element bookElement) {
String bookDiscount = "";
NodeList priceNode = bookElement.getChildNodes();
for(int i = 0;i<priceNode.getLength();i++){
Node allPriceNode = priceNode.item(i);
if(allPriceNode.getNodeType() == Node.ELEMENT_NODE){
continue;
}else{
bookDiscount += allPriceNode.getNodeValue();
}
}
/*取出折扣并去掉中文 轉化成0.xx格式*/
if(bookDiscount.indexOf("折扣:") != -1){
bookDiscount = bookDiscount.substring(bookDiscount.indexOf("折扣:") + 3);
}
if(bookDiscount.indexOf("折") != -1)
bookDiscount = "0."+bookDiscount.substring(0,
bookDiscount.indexOf("折")).trim();
bookDiscount = bookDiscount.replace(" ", "");
/*轉化成0.xx格式*/
//bookDiscount = String.valueOf(Integer.valueOf(bookDiscount)/100.0);
//System.out.println(bookDiscount);
return bookDiscount;
}
/**
* Function: getBookFixPrice
* Description: 獲得圖書定價
* Calls: no
* Called By: mainService
* @param bookElement as Element
* @return String
* @throws no
*/
public String getBookFixPrice(Element bookElement) {
String bookFixPrice = "";
if(bookElement.hasChildNodes()){
Node tempNode = bookElement.getFirstChild();
if(tempNode.getNodeType() == Node.ELEMENT_NODE){
Element tempElement = (Element)tempNode;
if(tempElement.hasChildNodes()){
bookFixPrice = tempElement.getFirstChild().getNodeValue();
}
}
}
//bookFixPrice = ((Element)bookElement.getFirstChild()).getFirstChild().getNodeValue().trim();
bookFixPrice = bookFixPrice.replace("¥", "");
bookFixPrice = bookFixPrice.replace(",", "");
bookFixPrice = bookFixPrice.replace(",", "");
if(bookFixPrice.length()>1)
bookFixPrice = bookFixPrice.trim();
//System.out.println(bookFixPrice);
return bookFixPrice;
}
/**
* Function: getBookUrl
* Description: 獲得圖書詳細信息地址
* Calls: no
* Called By: mainService
* @param bookElement as Element
* @return String
* @throws no
*/
public String getBookUrl(Element bookElement) {
String bookUrl = "";
if(bookElement.hasChildNodes()){
Node firstNode = bookElement.getFirstChild();
if(Node.ELEMENT_NODE == firstNode.getNodeType()){
Element firstElement = (Element)firstNode;
if(firstElement.hasAttribute("href"))
bookUrl = "http://search.dangdang.com/"+firstElement.getAttribute("href");
}
}
//System.out.println(bookUrl);
return bookUrl.trim();
}
/**
* Function: getBookContent
* Description: 獲得圖書詳細內容
* Calls: no
* Called By: mainService
* @param bookElement as Element
* @return String
* @throws no
*/
public String getBookContent(Element bookElement) {
String bookContent = "";
NodeList tempList = bookElement.getChildNodes();
for(int i = 0;i<tempList.getLength();i++){
Node tempNode = tempList.item(i);
if(tempNode.getNodeType() == Node.ELEMENT_NODE){
Element tempElement = (Element)tempNode;
if(tempElement.hasChildNodes())
bookContent += tempElement.getFirstChild().getNodeValue();
}else{
bookContent += tempNode.getNodeValue();
}
}
bookContent = bookContent.replace(" ", "");
if(null != bookContent){
if(bookContent.length()>255){
bookContent = bookContent.substring(0, 255).trim();
}
}
return bookContent;
}
/**
* Function: getNextPageUrl
* Description: 獲得下一頁超鏈接地址
* Calls: no
* Called By: no
* @param doc as Document
* @return String
* @throws no
*/
public String getNextPageUrl(Document doc) {
/*初始化為no,表示沒有下一頁*/
String nextpageUrl = "no";
NodeList divList = doc.getElementsByTagName("div");
for(int i = 0;i<divList.getLength();i++){
Node temNode = divList.item(i);
if(temNode.getNodeType()==Node.ELEMENT_NODE){
Element temElement = (Element)temNode;
/*過濾出分頁工具欄標簽<div id="divBottomPageNavi"...>*/
if("divBottomPageNavi".equals(temElement.getAttribute("id"))){
if(temElement.hasChildNodes()){
Node spanNode = temElement.getFirstChild();
NodeList aList = spanNode.getChildNodes();
for(int j = 0;j<aList.getLength();j++){
Node aNode = aList.item(j);
if(aNode.getNodeType()==Node.ELEMENT_NODE){
Element aElement = (Element)aNode;
/*如果<a name=link_page_next...>標簽,表明還有下一頁*/
if("link_page_next".equals(aElement.getAttribute("name"))){
nextpageUrl = "http://search.book.dangdang.com/"+aElement.getAttribute("href");
break;
}
}
}
}
}
}
}
//System.out.println(nextpageUrl);
return nextpageUrl;
}
public long getRecordNum(Document doc) {
/*初始化為0*/
long num = 0;
NodeList servers = doc.getElementsByTagName("div");
for (int i = 0; i < servers.getLength(); i++) {
Element serveritem = (Element) servers.item(i);
if ("l".equals(serveritem.getAttribute("class"))) {
NodeList childList1 = serveritem.getChildNodes();
boolean flag = false;
for (int j = 0; j < childList1.getLength(); j++) {
Node spanNode = childList1.item(j);
if (spanNode.getNodeType() == Node.ELEMENT_NODE){
Element spanElement = (Element)spanNode;
if(!flag){
if("SPAN".equals(spanElement.getNodeName()))
flag = true;
else
continue;
}else{
if(spanElement.hasChildNodes()){
String strNum = spanElement.getFirstChild().getNodeValue();
if(strNum.length()>0)
num = Long.valueOf(strNum.trim());
}
}
}
}
}
}
//System.out.println(num);
return num;
}
/**
* Class:DangdangparserSec
* Description: 根據每本書詳細信息的url去請求某本圖書的isbn
* extens:no
* implements:no
* @author feng guang
* @since 11/09/08
*/
public class DangdangparserSec{
/**
* Function: getBookISBNSec
* Description: 根據每本書詳細信息的url去請求某本圖書的isbn
* Calls: no
* Called By: this.mainService(Document doc,boolean flag)
* @param url as String
* @return string
* @throws Exception
*/
public String getBookISBNSec(String url) throws Exception{
String bookISBN = "";
/* 生成html 解析器 */
DOMParser parser = new DOMParser();
/* 設置網頁的默認編碼 */
parser.setProperty(
"http://cyberneko.org/html/properties/default-encoding",
"gb2312");
URL u = new URL(url);
/* 建立與源網站的連接 */
URLConnection urlconn = u.openConnection();
//urlconn.connect();
urlconn.setReadTimeout(30000);
//urlconn.setConnectTimeout(30000);
/* 獲得源網站的字節流,并轉化為字符流,設置編碼為utf-8 */
BufferedReader in = new BufferedReader(new InputStreamReader(urlconn
.getInputStream(), "gb2312"));
/* 進行解析,轉化為xml */
parser.parse(new InputSource(in));
/* 轉化為dom對象 */
Document doc = parser.getDocument();
/* 取得所有<ul>結點 */
NodeList list = doc.getElementsByTagName("ul");
for(int i = 0;i<list.getLength();i++){
Element childElement = (Element)list.item(i);
if (!"nostyle".equals(childElement.getAttribute("class"))) {
continue;
} else {
NodeList list1 = childElement.getChildNodes();
for(int j = 0;j<list1.getLength();j++){
Node liNode = list1.item(j);
if(liNode.getNodeType() == Node.ELEMENT_NODE){
Element liElement = (Element)liNode;
if(liElement.hasChildNodes()&&null != liElement.getFirstChild().getNodeValue())
if(liElement.getFirstChild().getNodeValue().indexOf("I S B N :") != -1){
bookISBN = liElement.getFirstChild().getNodeValue();
break;
}
}
}
break;
}
}
//System.out.println(bookISBN);
bookISBN = bookISBN.replace(" ", "");
if(bookISBN.length()>=5){
bookISBN = bookISBN.substring(5, bookISBN.length());
}
//System.out.println("<<" + bookISBN + ">>");
return bookISBN;
}
}
public String getBookISBN(Element bookElement) {
return null;
}
public Price getDetailInfo(Document doc) {
return null;
}
public static void main(String args[])throws Exception{
Dangdangparser dangDang = new Dangdangparser();
long beginTime = System.currentTimeMillis();
Document doc = dangDang.nekohtmlParser("http://search.book.dangdang.com/search.aspx?key=java%B1%E0%B3%CC%CB%BC%CF%EB");
ArrayList <Book> list = dangDang.mainService(doc,true);
Iterator it = list.iterator();
while(it.hasNext()){
Book temp = (Book)it.next();
//if(temp.getBookName().indexOf("――")!=-1)
System.out.println(temp.getBookName() + ">>" + temp.getBookISBN()
+ ">>" + temp.getBookAuthor()+">>"+temp.getBookFixPrice()+">>" +temp.getBookImage()
+ ">>" + temp.getBookPublisher()+">>"+temp.getBookPublishTime()+">>" + temp.getBookProspectus()
+ ">>" + temp.getPrice().getDangdangDiscount() + ">>" +temp.getPrice().getDangdangPrice()
+ ">>" + temp.getPrice().getDangdangUrl());
//System.out.println(">>>>>>>>>>>>>>"+temp.getBookContent());
}
System.out.println(System.currentTimeMillis() - beginTime);
//System.out.println(dangDang.getNextPageUrl(doc));
//dangDang.getRecordNum(doc);
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -