?? china_pubparser.java

?? 本系統實現了從五個網站上搜索的圖書進行整合后
?? JAVA
?? 第 1 頁 / 共 2 頁
字號:
12 下一頁
package com.booksearch.service.htmlparser;
/************************************************************
FileName: China_pubparser.java
Author: lichao 
Date:11/14/08
Description: 根據檢索關鍵字到www.china-pub.com抽取匹配內容
Class List: China_pubparser
***********************************************************/
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Iterator;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.StringTokenizer;

import org.apache.log4j.Logger;
import org.cyberneko.html.parsers.DOMParser;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;

import com.booksearch.orm.Book;
import com.booksearch.orm.Price;

/**
 * Class:China_pubparser
 * Description: 根據檢索關鍵字到www.china-pub.com抽取匹配內容
 * extens:no
 * implements:HtmlParser<Element>
 * @author  li chao
 * @since   11/14/08
 */
public class China_pubparser implements HtmlParser<String> {
   
	//private String url = "http://www.china-pub.com/s/?key1=java" ;
	/*存放本網站某一頁的記錄*/
	private ArrayList<Book> list;
	
//	private static final Logger logger;
//	
//	static 
//    {
//        logger = Logger.getLogger(com.booksearch.service.htmlparser.China_pubparser.class);
//    }
	
	/**
	* Function:  nekohtmlParser
	* Description:  用nekohtml解析器解析指定網頁，并轉化為dom對象
	* Calls:  no
	* Called By:  no
	* @param no
	* @return Document
	* @throws IOException,SAXException
	*/
	public Document nekohtmlParser(String url)throws Exception{
        // 生成html parse
		DOMParser parser = new DOMParser();
		// 設置網站默認編碼
		parser.setProperty(
					"http://cyberneko.org/html/properties/default-encoding",
					"UTF-8");

		URL u = new URL(url);
		/*建立與源網站的連接*/
		URLConnection urlConnection = u.openConnection();
		urlConnection.setReadTimeout(30000);
		//urlConnection.setConnectTimeout(30000);
		//urlConnection.connect();
		//*獲得源網站的字節流，并轉化為字符流，設置編碼為gb2312*/
		BufferedReader inputStream = new BufferedReader(new InputStreamReader(urlConnection.getInputStream(),"gb2312"));
		parser.parse(new InputSource(inputStream));
		Document doc = parser.getDocument();
		
		return doc;
	}
	/**
	* Function:  mainService
	* Description:  用nekohtml解析器解析指定網頁，并轉化為dom對象
	* Calls:  getBookImage(),getBookName(),getBookAuthor(),getBookISBN(),getBookFixPrice()
	*         getBookPublisher(),getBookPublishTime(),getBookPrice(),getBookUrl
	* Called By:  no
	* @param doc as Document
	* @return ArrayList<Book>
	* @throws no
	*/
	public ArrayList<Book> mainService(Document doc,boolean flag) {
		list = new ArrayList<Book>();
		/*過濾出<script></script>結點*/
		NodeList servers = doc.getElementsByTagName("script");
		for (int i = 0; i < servers.getLength(); i++) {
			
			Node node = servers.item(i);
			NodeList childNode = node.getChildNodes();
			if (childNode.getLength() > 0
					&& (childNode.item(0).getNodeValue().indexOf("dt") != -1)) {
				
				String tem = childNode.item(0).getNodeValue();
				/*因為結點中不止有一個[]，所以要先把第一二個[]過濾掉*/
				tem = tem.substring(tem.indexOf("dt"));
				/*過濾出數據組中的元素*/
				if(tem.indexOf("[") != -1&& tem.indexOf("]")!= -1){
					
					String bookStr = tem.substring(tem.indexOf("[")-1, tem
							.indexOf("]"));
					
					String bookArr[];
					int j = 0;
					/*按"}"進行分詞處理，分成單獨的一條條記錄*/
					StringTokenizer st = new StringTokenizer(bookStr, "}");
					bookArr = new String[st.countTokens()];
				
					while (st.hasMoreElements()) {
						/*因為分詞之后會有一個是空的，所以進行一下判斷*/
						if(j<20){
							bookArr[j] = st.nextToken();
							j++;
						}else break;
						
					}
	                /*循環遍歷每一條記錄，取出其中詳細的信息*/
					for (int k = 0; k < bookArr.length-1; k++) {
						
					  Book book = new Book();
					  Price price = new Price();
					  /*按","進行分詞*/
	                  StringTokenizer temp =new StringTokenizer(bookArr[k],",");
	              
	                  while(temp.hasMoreElements()){
	                	  
	                	  String temStr = temp.nextToken();
	                	  /*取出圖書名稱*/
	                	  if(temStr.indexOf("sm:")!=-1){
	                		  String bookName = getBookName(temStr);
	                		  book.setBookName(bookName);
	                		  //System.out.println(bookName);
	                	   /*取出圖書市場定價*/
	                	  }else if(temStr.indexOf("dj:")!=-1){
	                		  String bookFixPrice = getBookFixPrice(temStr);
	                		  if(null != bookFixPrice&&!"".equals(bookFixPrice))
	                		      book.setBookFixPrice(Double.valueOf(bookFixPrice.trim()));
	                		  //System.out.println(bookFixPrice);
	                	   /*取出圖書折扣和打折后的價格*/
	                	  }else if(temStr.indexOf("zk:")!=-1){
	                		  String bookDiscount = this.getBookDiscount(temStr);
	                		  //book.setBookDiscount(bookDiscount);
	                		  if(null != bookDiscount&&!"".equals(bookDiscount))
	                		      price.setChina_pubDiscount(Float.valueOf(bookDiscount.trim()));
	                		  /*求出本網站普通會員的買書價格*/
	                		  double bookPrice = book.getBookFixPrice()*Float.valueOf(bookDiscount.trim());   
	                		  /*進行格式化，保留兩位小數*/
	                		  DecimalFormat df = new DecimalFormat("####.00");           		  
	                		  price.setChina_pubPrice(Double.valueOf(df.format(bookPrice)));
	                		  //price.setChina_pubPrice(bookPrice);
	                		  //System.out.println(df.format(bookPrice));
	                		  //System.out.println(bookDiscount);
	                		  /*取出圖書ISBN號*/
	                	  }else if(temStr.indexOf("sh:")!=-1){
	                		  String bookISBN = getBookISBN(temStr);
	                		  book.setBookISBN(bookISBN);
	                		  //System.out.println(bookISBN);
	                		  /*取出圖書作者*/
	                	  }else if(temStr.indexOf("zz:")!=-1){
	                		  String bookAuthor = getBookAuthor(temStr);
	                		  book.setBookAuthor(bookAuthor);
	                		  //System.out.println(bookAuthor);
	                		  /*取出圖書出版日期*/
	                	  }else if(temStr.indexOf("cq:")!=-1){
	                		  String bookPublishTime = getBookPublishTime(temStr);
	                		  if(!"".equals(bookPublishTime)&&null!=bookPublishTime)
	                		      book.setBookPublishTime(bookPublishTime);
	                		  //System.out.println(bookPublishTime);
	                		  /*取出圖書出版社*/
	                	  }else if(temStr.indexOf("cs:")!=-1){
	                		  String bookPublisher = getBookPublisher(temStr);
	                		  book.setBookPublisher(bookPublisher);
	                		  //System.out.println(bookPublisher);
	                		  /*出版圖書封面圖書地址*/
	                	  }else if(temStr.indexOf("pd:")!=-1){
	                		  String bookImage = getBookImage(temStr);
	                		  book.setBookImage(bookImage);
	                		  //String bookUrl = getBookUrl(temStr);
	                		  //book.setBookUrl(bookUrl);
	                		  //System.out.println(bookImage);
	                		  /*取出圖書詳細信息地址*/
	                	  }else if(temStr.indexOf("th:")!=-1){
	                		  String bookUrl = getBookUrl(temStr);
	                		 // book.setBookUrl(bookUrl);
	                		  price.setChina_pubUrl(bookUrl);
	                		  //System.out.println(bookUrl);
	                	  }
	                  }
	                  if(book!=null){
	                	  book.setPrice(price);
	                	  list.add(book);
	                  }
					}
				}	
			}
		}
		return list;
	}
	public Price getDetailInfo(Document doc) {
		Price price = new Price();
		String bookFixPrice = "";
		/*過濾出<script></script>結點*/
		NodeList servers = doc.getElementsByTagName("script");
		for (int i = 0; i < servers.getLength(); i++) {
			
			Node node = servers.item(i);
			NodeList childNode = node.getChildNodes();
			if (childNode.getLength() > 0
					&& (childNode.item(0).getNodeValue().indexOf("dt") != -1)) {
				
				String tem = childNode.item(0).getNodeValue();
				/*因為結點中不止有一個[]，所以要先把第一二個[]過濾掉*/
				tem = tem.substring(tem.indexOf("dt"));
				/*過濾出數據組中的元素*/
				if(tem.indexOf("[") != -1&& tem.indexOf("]")!= -1){
					
					String bookStr = tem.substring(tem.indexOf("[")-1, tem
							.indexOf("]"));
					
                    if(bookStr.indexOf("{")!= -1&&bookStr.indexOf("}")!= -1){
                    	
                      String priceStr = bookStr.substring(bookStr.indexOf("{") + 1, bookStr.indexOf("}"));
					  /*按","進行分詞*/
	                  StringTokenizer temp =new StringTokenizer(priceStr,",");
	              
	                  while(temp.hasMoreElements()){
	                	  
	                	 String temStr = temp.nextToken();
                         if(temStr.indexOf("dj:")!=-1){
	                		  bookFixPrice = getBookFixPrice(temStr);
	                	   /*取出圖書折扣和打折后的價格*/
	                	  }else if(temStr.indexOf("zk:")!=-1){
	                		  String bookDiscount = this.getBookDiscount(temStr);
                              //System.out.println(bookDiscount);
	                		  price.setChina_pubDiscount(Float.valueOf(bookDiscount.trim()));
	                		  /*求出本網站普通會員的買書價格*/
	                		  double bookPrice = Double.valueOf(bookFixPrice)*Double.valueOf(bookDiscount.trim()); 
//	                		  DecimalFormat df=(DecimalFormat)DecimalFormat.getInstance();
//	              			  df.setMaximumFractionDigits(2);
	                		  DecimalFormat df = new DecimalFormat("####.00");     
	                		  price.setChina_pubPrice(Double.valueOf(df.format(bookPrice)));
	                		  //price.setChina_pubPrice(bookPrice);
	                		  //System.out.println(bookPrice);
	                	  }else if(temStr.indexOf("th:")!=-1){
	                		  String bookUrl = getBookUrl(temStr);

		                	   price.setChina_pubUrl(bookUrl);
	                		   //System.out.println(bookUrl);
		                 }
	                  }
					}
				}
12 下一頁
?? 文件大小 22867 K
?? 上傳用戶 moxcki
?? 所屬分類 JavaScript
??? 相關標簽

#搜索 #圖書 #網站
?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

?? china_pubparser.java

?? 快捷鍵說明