?? totalvideoaddr.java
字號(hào):
package cn.myvideosite.exe.parser;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.AndFilter;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.ImageTag;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import cn.myvideosite.commons.Constant;
import cn.myvideosite.data.model.bean.VideoInformation;
import cn.myvideosite.data.model.services.VideoInfoService;
import cn.myvideosite.exception.DownloadException;
import cn.myvideosite.util.FileUtil;
import cn.myvideosite.util.HttpUtil;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
public class TotalVideoAddr {
/**
*
* @param url <p class="tab">
*/
/*private static final NodeFilter FILTER_DIV_TAB=
new AndFilter(new TagNameFilter("p"),new HasAttributeFilter("class","tab"));*/
/**
*
* @param url <a
*/
private static final NodeFilter FILTER_DIV_A=new TagNameFilter("a");
/**
* <div class="video" > <span id=s_Mzc5NTUxMzQ>
*/
private static final NodeFilter FILTER_DIV_VIDEO=
new AndFilter(new TagNameFilter("div"),new HasAttributeFilter("class","video"));
/**
* <span id=s_Mzc5NTUxMzQ>
*/
private static final NodeFilter FILTER_DIV_SPAN=new TagNameFilter("span");
/**
* <img src=
*/
private static final NodeFilter FILTER_DIV_IMG=new TagNameFilter("img");
private static final String INDEX_URL="http://so.56.com/index?type=video&key=";
/**
*
* @param url 抓取頻道頁的所有子連接
*/
private static void channelParse(String url){
/*String page=HttpUtil.request(url, Constant.CHARSET_GB2312);
if(page != null){
Parser pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);
try {
NodeList tabNL=pageParser.parse(FILTER_DIV_TAB);
if( tabNL != null && tabNL.size()>0){
pageParser=Parser.createParser(tabNL.toHtml(), Constant.CHARSET_GB2312);
NodeList aNL=pageParser.parse(FILTER_DIV_A);
if( aNL != null && aNL.size()>0){
for(int i2=98;i2<aNL.size();i2++){
LinkTag aLink=(LinkTag) aNL.elementAt(i2);
System.out.println(INDEX_URL+URLEncoder.encode(aLink.getLinkText(),"GBK"));
pages(INDEX_URL+URLEncoder.encode(aLink.getLinkText(),"GBK"));
System.out.println("****************"+aLink.getLinkText()+"下載完畢!!"+"******************");
}
}
}
} catch (ParserException e) {
e.printStackTrace();
}catch (UnsupportedEncodingException e){
e.printStackTrace();
}
} */
try {
pages(INDEX_URL+URLEncoder.encode("封神榜 ","GBK"));
pages(INDEX_URL+URLEncoder.encode("大話西游","GBK"));
pages(INDEX_URL+URLEncoder.encode("武林外傳","GBK"));
pages(INDEX_URL+URLEncoder.encode("拳皇","GBK"));
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
*
* @param url 抓取一個(gè)頻道一個(gè)頁面的視頻連接和圖片
*/
public static void childrenParse(String url){
int n=1;
String page=HttpUtil.request(url, Constant.CHARSET_GB2312);
if(page != null){
Parser pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);
try {
NodeList nl=pageParser.parse(FILTER_DIV_VIDEO);
pageParser=Parser.createParser(nl.toHtml(), Constant.CHARSET_GB2312);
NodeList spanNL=pageParser.parse(FILTER_DIV_SPAN);
if(spanNL!=null && spanNL.size()>0 ){
for(int i=0;i<spanNL.size();i++){
VideoInformation videoinfo= null;
Node spanNode=spanNL.elementAt(i);
pageParser=Parser.createParser(spanNode.toHtml(), Constant.CHARSET_GB2312);
NodeList aNL=pageParser.parse(FILTER_DIV_A);
if( aNL != null && aNL.size()>0){
for(int i2=0;i2<aNL.size();i2++){
LinkTag link=(LinkTag) aNL.elementAt(i2);
System.out.println("=============="+link.getLink()+(n++)+"==================");
videoinfo=VideoInfoParser.parse(link.getLink());
// VideoInfoParser.getVideoAlbumMore(link.getLink());
if(videoinfo!=null)
videoinfo.setFlashAddress(link.getLink());
}
}
// 視頻 圖片下載
if(videoinfo!=null){
pageParser=Parser.createParser(spanNode.toHtml(), Constant.CHARSET_GB2312);
NodeList imgNL=pageParser.parse(FILTER_DIV_IMG);
if( imgNL != null && imgNL.size()>0){
for(int i2=0;i2<imgNL.size();i2++){
Node imgNode=imgNL.elementAt(i2);
if(imgNode !=null){
if(imgNode instanceof ImageTag){
ImageTag img=(ImageTag)imgNode;
videoinfo.setSoureUrl(img.getImageURL());
System.out.println("圖片地址:"+img.getImageURL());
try {
String newImg = FileUtil.download(img.getImageURL());
videoinfo.setNewUrl(newImg);
System.out.println ("圖片新地址:"+newImg);
} catch (DownloadException e) {
videoinfo.setNewUrl("");
e.printStackTrace();
}
}
}
}
}
VideoInfoService.save(videoinfo);
}
}
}
} catch (ParserException e) {
e.printStackTrace();
}
}
}
/**
* 取得一個(gè)頻道的所有視頻連接地址
*/
public static void pages(String url){
for(int ii=0;ii<99;ii++){
String surl = url+"&startat="+10*ii;
childrenParse(surl);
}
}
public static void main(String[] args) {
channelParse("http://www.56.com/w/show_channel.phtml");
//childrenParser("http://so.56.com/index?type=video&key=%D4%AD%B4%B4");
//pages("http://so.56.com/index?type=video&key=%D4%AD%B4%B4");
/*try {
System.out.println(URLEncoder.encode("電視劇", "GBK"));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}*/
//page("http://www.56.com/w/Channel.php?c=3&tag=%D4%AD%B4%B4");
}
}
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -