?? tsinghuaparser.java
字號:
public String getBookAuthor(Element bookElement) {
String bookAuthor = "";
if(bookElement.hasChildNodes())
bookAuthor = bookElement.getFirstChild().getNodeValue();
if(null!=bookAuthor)
bookAuthor=bookAuthor.replaceAll(" ", "");
if(bookAuthor.length()>64)
bookAuthor=bookAuthor.substring(0, 63);
bookAuthor = bookAuthor.replace(",", " ");
bookAuthor = bookAuthor.replace(",", " ");
bookAuthor = bookAuthor.replace(";", " ");
bookAuthor = bookAuthor.replace("、", " ");
bookAuthor = bookAuthor.replace("等", "");
return bookAuthor;
}
/**
* Function: getBookPublishTime Description: 獲得圖書出版時間 Calls: no Called
* By:mainService
*
* @param bookElement
* as Element
* @return String
* @throws no
*/
public String getBookPublishTime(Element bookElement) {
String bookPublishTime = "";
if(bookElement.hasChildNodes())
bookPublishTime = bookElement.getFirstChild().getNodeValue();
if(bookPublishTime.length()>0)
bookPublishTime = bookPublishTime.replace(" ", "");
return bookPublishTime;
}
/**
* Function: getBookPrice
* Description: 獲得圖書價格
* Calls: no
* Called By:mainService
* @param bookElement as Element
* @return String
* @throws no
*/
public String getBookPrice(Element bookElement) {
String bookPrice ="";
if(bookElement.hasChildNodes())
bookPrice = bookElement.getFirstChild().getNodeValue();
if(bookPrice.length()>0)
bookPrice = bookPrice.replace(" ", "");
return bookPrice;
}
/**
* Function: getBookISBN
* Description: 獲得圖書ISBN
* Calls: no Called
* By:mainService
* @param bookElement as Element
* @return String
* @throws no
*/
public String getBookISBN(Element bookElement) {
String bookISBN = "";
if(bookElement.hasChildNodes()){
bookISBN = bookElement.getFirstChild().getNodeValue();
}
if(bookISBN.length()>0)
bookISBN = bookISBN.replace(" ", "");
return bookISBN;
}
/**
* Function: getBookUrl
* Description: 獲得圖書詳細信息地址
* Calls: no Called
* By:mainService
* @param bookElement as Element
* @return String
* @throws no
*/
public String getBookUrl(Element bookElement) {
String bookUrl = "";
if(bookElement.hasChildNodes()
&&Node.ELEMENT_NODE == bookElement.getFirstChild().getNodeType()){
Element firstElement = (Element) bookElement.getFirstChild();
if(firstElement.hasAttribute("href"))
bookUrl = "http://www.tup.tsinghua.edu.cn/book/"
+ firstElement.getAttribute("href");
if(bookUrl.length()>0)
bookUrl = bookUrl.replace(" ", "");
}
return bookUrl;
}
public String getBookDetailMesg(Element bookElement) {
return null;
}
public String getBookContent(Element bookElement) {
return null;
}
public String getBookImage(Element bookElement) {
return null;
}
public String getBookPublisher(Element bookElement) {
return null;
}
public String getBookDiscount(Element bookElement) {
return null;
}
public String getBookFixPrice(Element bookElement) {
return null;
}
/**
* Function: getNextPageUel
* Description: 用nekohtml解析器解析指定網(wǎng)頁,并轉(zhuǎn)化為dom對象
* Calls:no
* Called By: no
* @param doc as Document
* @return String pageUrl
* @throws no
*/
public String getNextPageUrl(Document doc) {
String pageUrl = "no";
NodeList servers = doc.getElementsByTagName("table");
for (int i = 0; i < servers.getLength(); i++) {
if(Node.ELEMENT_NODE == servers.item(i).getNodeType()){
Element serveritem = (Element) servers.item(i);
if ("40".equals(serveritem.getAttribute("top"))) {
NodeList bookList = serveritem.getChildNodes();
for (int j = 0; j < bookList.getLength(); j++){
Node trNode = bookList.item(j);
if (Node.ELEMENT_NODE == trNode.getNodeType()&&"TR".equals(trNode.getNodeName())) {
/* 如果是元素結(jié)點(<td>),取出其中的文本值 */
Element trElement = (Element) trNode;
if(trElement.hasChildNodes()){
NodeList tdNodeList = trElement.getChildNodes();
for(int k = 0;k<tdNodeList.getLength();k++){
Node tdNode = tdNodeList.item(k);
if(Node.ELEMENT_NODE == tdNode.getNodeType()&&"TD".equals(tdNode.getNodeName())){
NodeList aNodeList = tdNode.getChildNodes();
for(int m = 0;m<aNodeList.getLength();m++){
Node aNode = aNodeList.item(m);
if(Node.ELEMENT_NODE == aNode.getNodeType()&&"A".equals(aNode.getNodeName())){
NodeList imgNodeList = aNode.getChildNodes();
for(int n = 0;n<imgNodeList.getLength();n++){
Node imgNode = imgNodeList.item(n);
if(Node.ELEMENT_NODE == imgNode.getNodeType()&&"IMG".equals(imgNode.getNodeName())){
if("顯示下一頁".equals(((Element)imgNode).getAttribute("alt"))){
pageUrl = "http://www.tup.tsinghua.edu.cn"
+ ((Element) aNode).getAttribute("href");
break;
}
}
}
}
}
}
}
}
break;
}
}
}
}
}
return pageUrl;
}
/**
* Function: getRecordNum
* Description: 獲得記錄總數(shù)
* Calls:no
* Called By: no
* @param doc as Document
* @return long
* @throws no
*/
public long getRecordNum(Document doc) {
String pageUrl =null;
long num = 0;
NodeList servers = doc.getElementsByTagName("table");
for (int i = 0; i < servers.getLength(); i++) {
if(Node.ELEMENT_NODE == servers.item(i).getNodeType()){
Element serveritem = (Element) servers.item(i);
if ("40".equals(serveritem.getAttribute("top"))) {
NodeList bookList = serveritem.getChildNodes();
for (int j = 0; j < bookList.getLength(); j++){
Node trNode = bookList.item(j);
if (Node.ELEMENT_NODE == trNode.getNodeType()&&"TR".equals(trNode.getNodeName())) {
/* 如果是元素結(jié)點(<td>),取出其中的文本值 */
Element trElement = (Element) trNode;
if(trElement.hasChildNodes()){
NodeList tdNodeList = trElement.getChildNodes();
for(int k = 0;k<tdNodeList.getLength();k++){
Node tdNode = tdNodeList.item(k);
if(Node.ELEMENT_NODE == tdNode.getNodeType()&&"TD".equals(tdNode.getNodeName())){
NodeList aNodeList = tdNode.getChildNodes();
for(int m = 0;m<aNodeList.getLength();m++){
Node aNode = aNodeList.item(m);
if(Node.ELEMENT_NODE == aNode.getNodeType()&&"A".equals(aNode.getNodeName())){
NodeList imgNodeList = aNode.getChildNodes();
for(int n = 0;n<imgNodeList.getLength();n++){
Node imgNode = imgNodeList.item(n);
if(Node.ELEMENT_NODE == imgNode.getNodeType()&&"IMG".equals(imgNode.getNodeName())){
if("顯示最后一頁".equals(((Element)imgNode).getAttribute("alt"))){
pageUrl = ((Element) aNode).getAttribute("href");
if(pageUrl.indexOf("page=") != -1)
num = (Integer.valueOf(pageUrl.substring(pageUrl.indexOf("page=") + 5)) - 1)*20;
break;
}
}
}
}
}
}
}
}
break;
}
}
}
}
}
return num;
}
/* 測試方法 */
public static void main(String[] args) throws Exception {
Tsinghuaparser a = new Tsinghuaparser();
Document doc = a.nekohtmlParser("http://www.tup.com.cn/book/search.asp?keyword=java");
System.out.println(a.getNextPageUrl(doc));
System.out.println(a.getRecordNum(doc));
//Price price = a.getDetailInfo(doc);
//System.out.println(price.getTsinghuaDiscount() + ">>" + price.getTsinghuaPrice() + price.getTsinghuaUrl());
// a.getRecordNum(b);
// ArrayList<Book> tem = a.mainService(b,true);
// Iterator it = tem.iterator();
// while(it.hasNext()){
// Book temp = (Book)it.next();
// Price price = temp.getPrice();
// System.out.println(price.getTsinghuaPrice()+">>"+price.getTsinghuaDiscount()+price.getTsinghuaUrl());
//
// }
}
}
?? 快捷鍵說明
復(fù)制代碼
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -