?? getrealcount.java
字號:
package com.blogool.crawl;
import java.io.*;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.flytinge.ContentHandle;
import org.flytinge.HttpGet;
import com.blogool.crawl.lib.Cat;
public class GetRealCount {
public static void main(String[] args) {
Cat root = Util.loadCat(new File("d:/libox1/cats.xml"));
ExitThread et = new ExitThread(root, new File("d:/libox1/cats1.xml"));
//注冊退出事件
Runtime.getRuntime().addShutdownHook(et);
for (int i = 0; i < root.getCats().size(); i ++) {
Cat cat = root.getCats().get(i);
List<Cat> list = cat.getCats();
for (int j = 0; j < list.size(); j ++) {
Cat c = list.get(j);
System.out.println("handle cat:" + c.getCatName());
String url = c.getUrl();
HandleGetRealCount hgrc = new HandleGetRealCount(c);
HttpGet hg = new HttpGet(url, "</html>", hgrc);
hg.start();
}
}
}
//獲取真實商品個數和記錄流行商品
static class HandleGetRealCount implements ContentHandle {
private Cat cat;
private String content;
private static Pattern pSize = Pattern.compile("<div class=\"sea_r_part3_left text_bold\">\\d+ \\- \\d+ of (\\d+) products</div>");
private static Pattern pPopular = Pattern.compile("<div id=\"product_list_single\".+?>\\s*<a href=\"(.+?)\".+?>");
public HandleGetRealCount(Cat c) {
this.cat = c;
}
public void handle(String content) {
this.content = content;
//修正數量
Matcher m = pSize.matcher(content);
if (m.find()) {
int max = Integer.parseInt(m.group(1));
cat.setSize(max);
cat.setUrl(cat.getUrl());
} else {
System.out.println("Cat" + cat.getCatName() + "\t\t\t:::error::: use default: 1");
cat.setSize(1);
}
//流行商品解析
StringBuilder sb = new StringBuilder();
m = pPopular.matcher(content);
while (m.find()) {
sb.append(m.group(1));
sb.append(",");
}
String ids = sb.toString();
if (ids.length() > 0)
cat.setPopularIds(ids);
}
}
//退出之前保存
public static class ExitThread extends Thread {
private File file;
private Cat root;
public ExitThread(Cat cat, File f) {
this.file = f;
this.root = cat;
}
public void run() {
Util.saveCat(root, file);
}
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -