?? parseitemcontenthandle.java
字號:
package com.blogool.crawl;
import java.util.regex.*;
import org.flytinge.ContentHandle;
import com.blogool.crawl.lib.*;
public class ParseItemContentHandle implements ContentHandle {
Pattern pItem = Pattern.compile("<div\\s+class=\"sea_r_part4_left\">\\s*<a\\s*href=\"(.+?)\".+?><img src=\"(.+?)\" border=\"\\d*\"\\s*alt=\"(.+?)\"\\s*width=\"60\"\\s*height=\"60\"\\s*\\/><\\/a>\\s*<\\/div>");
private Cat cat;
public ParseItemContentHandle(Cat cat) {
this.cat = cat;
}
public void handle(String content) {
Matcher m = pItem.matcher(content);
while (m.find()) {
try {
Item item = new Item();
item.setUrl(m.group(1));
item.setImageUrls(new String[] {m.group(2)});
synchronized (this.cat) {
this.cat.getItems().add(item);
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -