?? videoinfoparser.java
字號:
package cn.myvideosite.exe.parser;
import java.util.Date;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.AndFilter;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.InputTag;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import cn.myvideosite.commons.Constant;
import cn.myvideosite.data.model.bean.ChannelClass;
import cn.myvideosite.data.model.bean.UserInfo;
import cn.myvideosite.data.model.bean.VideoInformation;
import cn.myvideosite.data.model.services.ChannelService;
import cn.myvideosite.data.model.services.VideoInfoService;
import cn.myvideosite.util.HttpUtil;
import cn.myvideosite.util.MySuperDate;
public class VideoInfoParser {
/**
* @param args 解析視頻信息
* <div id="videoInfo_con">
* //
*/
/* private static final NodeFilter FILTER_DIV_VIDEOINFO=
new AndFilter(new TagNameFilter("div"),new HasAttributeFilter("id","videoInfo_con"));*/
/**
* <dl class="uInfo"> 視頻用戶信息
*/
private static final NodeFilter FILTER_DIV_UINFO=
new AndFilter(new TagNameFilter("dl"),new HasAttributeFilter("class","uInfo"));
/**
* <ul class="vInfo"> 視頻信息
*/
private static final NodeFilter FILTER_UL_VINFO=
new AndFilter(new TagNameFilter("ul"),new HasAttributeFilter("class","vInfo"));
/**
* <span>
* @param url
*/
private static final NodeFilter FILTER_SPAN=new TagNameFilter("span");
/**
* <li>
*/
private static final NodeFilter FILTER_LI=new TagNameFilter("li");
/**
* <a
* @param url
*/
private static final NodeFilter FILTER_A=new TagNameFilter("a");
/**
*
* @param url <title>56網視頻</title>
* @return
*/
private static final NodeFilter FILTER_TITLE=new TagNameFilter("title");
/**
*
* @param url 抓取美女主播的信息 <div class="mid clearfix"> <div class="uinfo clearfix">
* @return<div class="morevinfo"
*/
private static final NodeFilter FILTER_DIV_MIDCLEAR=
new AndFilter(new TagNameFilter("div"),new HasAttributeFilter("class","mid clearfix"));
/**
*
* @param url <p
* @return
*/
private static final NodeFilter FILTER_P=new TagNameFilter("p");
/**
*
* @param url<div class="morevinfo" 視頻簡介
* @return
*/
private static final NodeFilter FILTER_DIV_MOREVINFO=
new AndFilter(new TagNameFilter("div"),new HasAttributeFilter("class","morevinfo"));
/**
*
* @param url
* @return <input id="input_page_url"
*/
private static final NodeFilter FILTER_INPUT=new TagNameFilter("input");
/**
* <div class="albumimg"> 視頻的相關專輯信息
*/
private static final NodeFilter FILTER_DIV_ALBUMIMG=
new AndFilter (new TagNameFilter("div"),new HasAttributeFilter("class","albumimg"));
/**
*
* @param url <img
* @return
*/
//private static final NodeFilter FILTER_IMG=new TagNameFilter("img");
/**
*
* @param url <p class="more">
* @return
*/
private static final NodeFilter FILTER_P_MORE=
new AndFilter (new TagNameFilter("p"),new HasAttributeFilter("class","more"));
/**
*
* @param url 導演(Director)信息
* @return <h2 <1
*/
private static final NodeFilter FILTER_H1=new TagNameFilter("h1");
private static final NodeFilter FILTER_H2=new TagNameFilter("h2");
/**
*
* @param url <dl class="uinfo">
* @return
*/
/*private static final NodeFilter FILTER_P_CONTENT=
new AndFilter (new TagNameFilter("dl"),new HasAttributeFilter("class","uinfo"));*/
/**
*
* @param url <div class="vinfo" <dd>
* @return
*/
/*private static final NodeFilter FILTER_DIV_VINFO=
new AndFilter (new TagNameFilter("div"),new HasAttributeFilter("class","vinfo"));*/
/**
*
* @param url 導演信息的 <dd> 標簽
* @return
*/
//private static final NodeFilter FILTER_DD=new TagNameFilter("dd");
/**
*
* @param url 視頻標題標簽 <div class = "title3";>
* @return
*/
private static final NodeFilter FILTER_DIV_TITLE3=
new AndFilter (new TagNameFilter("div"),new HasAttributeFilter("class","title3"));
public static VideoInformation parse(String url){
VideoInformation videoinfo = VideoInfoService.findByFlashAddr(url);
if( videoinfo != null) return videoinfo;
String page=HttpUtil.request(url, Constant.CHARSET_GB2312);
if(page !=null){
Parser pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);
try {
NodeList titleNL=pageParser.parse(FILTER_TITLE); //判斷標題是否為 56網 .........
String str=titleNL.elementAt(0).getChildren().elementAt(0).getText();
//System.out.println(str);
pageParser=Parser.createParser(page, Constant.CHARSET_GB2312); //判斷是否為導演信息 或是雷區
NodeList h2NL=pageParser.parse(FILTER_H2);
if(h2NL !=null && h2NL.size()>0){
String str1=h2NL.elementAt(0).getChildren().elementAt(0).getText();
if(str1.equals("導演視頻信息") || str1.equals("今天你被雷到了嗎?") || str1.equals("用戶評論"))
{return null;}
//System.out.println(str1);
}
if(str.equals("56網") || str.equals("56") || str==null || str.equals("56tv - 播放") || str.equals("白領公寓[21].56tv - 播放")) {return null;}
videoinfo = new VideoInformation();
if(str.equals("56網視頻") || str.equals("56.com 視頻")){
pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);
NodeList uinfoNL=pageParser.parse(FILTER_DIV_UINFO);
titleParse( page , videoinfo); //保存 標題
if(uinfoNL !=null && uinfoNL.size()>0){
for(int i=0;i<uinfoNL.size();i++){
Node uinfoNode=uinfoNL.elementAt(i);
pageParser=Parser.createParser(uinfoNode.toHtml(), Constant.CHARSET_GB2312);
NodeList aNL=pageParser.parse(FILTER_A);
if(aNL !=null && aNL.size()>0){
LinkTag link=(LinkTag) aNL.elementAt(0);
System.out.println("會員空間地址:"+link.getLink());
UserInfo userInfo=UserInfoParser.parse(link.getLink());
if(userInfo !=null ){//會員空間地址
videoinfo.setUserId(userInfo.getUserId());
} else{
videoinfo.setUserId(0);
}
}
pageParser=Parser.createParser(uinfoNode.toHtml(), Constant.CHARSET_GB2312);
NodeList spanNL=pageParser.parse(FILTER_SPAN); //上傳時間
if(spanNL !=null && spanNL.size()>0){
Node spanNode=spanNL.elementAt(3);
if(spanNode != null){
videoinfo.setUploadTime(new MySuperDate(spanNode.getFirstChild().getText(), false).getDate());
System.out.println("上傳時間:"+spanNode.getFirstChild().getText());
}else{
videoinfo.setUploadTime(new Date());
}
}
}
}
pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);
NodeList vinfoNL=pageParser.parse(FILTER_UL_VINFO);
if(vinfoNL !=null && vinfoNL.size()>0 ){
for(int i=0;i<vinfoNL.size();i++){
Node vinfoNode=vinfoNL.elementAt(i);
pageParser=Parser.createParser(vinfoNode.toHtml(), Constant.CHARSET_GB2312);
NodeList liNL=pageParser.parse(FILTER_LI);
if(liNL !=null && liNL.size()>0 ){
Node liNode=liNL.elementAt(1); // 頻道
if(liNode !=null && !liNode.equals("")){
pageParser=Parser.createParser(liNode.toHtml(), Constant.CHARSET_GB2312); // ?
NodeList aNL=pageParser.parse(FILTER_A);
if(aNL !=null && aNL.size()>0){
LinkTag link=(LinkTag) aNL.elementAt(0);
System.out.println("頻道:"+link.getLinkText());
ChannelClass channel = ChannelService.findByChannelName(link.getLinkText());
if(channel != null){
videoinfo.setChannelId(channel.getChannelId());
} else{
videoinfo.setChannelId(0);
}
}
}
Node liNode3=liNL.elementAt(3); //視頻簡介
if(liNode3 !=null && !liNode3.equals("")){
videoinfo.setIntroduction(liNode3.getChildren().elementAt(1).getText());
System.out.println("視頻簡介:"+liNode3.getChildren().elementAt(1).getText());
}
}
}
}
}else{
pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);
NodeList h1NL=pageParser.parse(FILTER_H1);
if(h1NL != null && h1NL.size()>0){
Node h1Node = h1NL.elementAt(0);
if( h1NL != null )
videoinfo.setVideoTitle(h1Node.getLastChild().toPlainTextString());
System.out.println("標題:"+h1Node.getLastChild().toPlainTextString());
}
pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);
NodeList midClearNL=pageParser.parse(FILTER_DIV_MIDCLEAR);
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -