?? totalalbumaddr.java
字號:
package cn.myvideosite.exe.parser;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.AndFilter;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import cn.myvideosite.commons.Constant;
import cn.myvideosite.util.HttpUtil;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
public class TotalAlbumAddr {
/**
*
* @param url <p class="tab">
*/
private static final NodeFilter FILTER_DIV_TAB=
new AndFilter(new TagNameFilter("p"),new HasAttributeFilter("class","tab"));
/**
*
* @param url <a
*/
private static final NodeFilter FILTER_DIV_A=new TagNameFilter("a");
/**
* <div class="video" > <span id=s_Mzc5NTUxMzQ>
*/
private static final NodeFilter FILTER_DIV_VIDEO=
new AndFilter(new TagNameFilter("div"),new HasAttributeFilter("class","albumimg"));
/**
* <span id=s_Mzc5NTUxMzQ>
*/
private static final NodeFilter FILTER_DIV_SPAN=new TagNameFilter("span");
private static final String INDEX_URL="http://so.56.com/index?type=album&key=";
/**
*
* @param url 抓取頻道頁的所有子連接
*/
private static void channelParse(String url){
String page=HttpUtil.request(url, Constant.CHARSET_GB2312);
if(page != null){
Parser pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);
try {
NodeList tabNL=pageParser.parse(FILTER_DIV_TAB);
if( tabNL != null && tabNL.size()>0){
pageParser=Parser.createParser(tabNL.toHtml(), Constant.CHARSET_GB2312);
NodeList aNL=pageParser.parse(FILTER_DIV_A);
if( aNL != null && aNL.size()>0){
for(int i2=104;i2<aNL.size();i2++){ // 健康
LinkTag aLink=(LinkTag) aNL.elementAt(i2);
System.out.println(INDEX_URL+URLEncoder.encode(aLink.getLinkText(),"GBK"));
pages(INDEX_URL+URLEncoder.encode(aLink.getLinkText(),"GBK"));
System.out.println("****************"+aLink.getLinkText()+"下載完畢!!"+"******************");
}
}
}
} catch (ParserException e) {
e.printStackTrace();
}catch (UnsupportedEncodingException e){
e.printStackTrace();
}
}
/*try {
pages(INDEX_URL+URLEncoder.encode("獨立電影","GBK"));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} */
}
/**
*
* @param url 抓取一個頻道一個頁面的視頻連接和圖片
*/
public static void childrenParse(String url){
int n=1;
String page=HttpUtil.request(url, Constant.CHARSET_GB2312);
if(page != null){
Parser pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);
try {
NodeList nl=pageParser.parse(FILTER_DIV_VIDEO);
pageParser=Parser.createParser(nl.toHtml(), Constant.CHARSET_GB2312);
NodeList spanNL=pageParser.parse(FILTER_DIV_SPAN);
if(spanNL!=null && spanNL.size()>0 ){
for(int i=0;i<spanNL.size();i++){
Node spanNode=spanNL.elementAt(i);
pageParser=Parser.createParser(spanNode.toHtml(), Constant.CHARSET_GB2312);
NodeList aNL=pageParser.parse(FILTER_DIV_A);
if( aNL != null && aNL.size()>0){
for(int i2=0;i2<aNL.size();i2++){
LinkTag link=(LinkTag) aNL.elementAt(i2);
System.out.println("=============="+link.getLink()+(n++)+"==================");
if( link.getLink() != null){
AlbumInfoParser.parse(link.getLink());
}
}
}
}
}
} catch (ParserException e){
e.printStackTrace();
}
}
}
/**
* 取得一個頻道的所有視頻連接地址
*/
public static void pages(String url){
for(int ii=0;ii<=50;ii++){
String surl = url+"&startat="+10*ii;
childrenParse(surl);
}
}
public static void main(String[] args) {
channelParse("http://www.56.com/w/show_channel.phtml");
//childrenParser("http://so.56.com/index?type=video&key=%D4%AD%B4%B4");
//pages("http://so.56.com/index?type=video&key=%D4%AD%B4%B4");
/*try {
System.out.println(URLEncoder.encode("電視劇", "GBK"));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}*/
//page("http://www.56.com/w/Channel.php?c=3&tag=%D4%AD%B4%B4");
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -