?? downloadimage.java
字號:
package com.blogool.crawl;
import java.io.*;
import java.util.*;
import org.flytinge.HttpDataGetItem;
import org.flytinge.HttpDataListGet;
import org.flytinge.HttpListGet;
import org.flytinge.HttpListGetItem;
import org.flytinge.SuperContentHandle;
import org.flytinge.SuperDataHandle;
import com.blogool.crawl.lib.*;
public class DownloadImage {
public static void main(String[] args) {
Cat root = Util.loadCat(new File("d:/libox1/cats4.xml"));
List<HttpDataGetItem> hlgItems = new ArrayList<HttpDataGetItem>();
DownloadImageHandle handle = new DownloadImageHandle();
//init output files
Set<String> sets = new HashSet<String>();
File[] files = DownloadImageHandle.OUTPUT_PATH.listFiles();
for (int i = 0; i < files.length; i ++) {
/*
if (files[i].length() < 4096) {
files[i].delete();
} else {
sets.add(files[i].getName());
}*/
sets.add(files[i].getName());
}
for (int i = 0; i < root.getCats().size(); i ++) {
Cat c = root.getCats().get(i);
for (int j = 0; j < c.getCats().size(); j ++) {
Cat cat = c.getCats().get(j);
List<Item> list = cat.getItems();
if (list != null) {
for (int k = 0; k < list.size(); k ++) {
/*
Item item = list.get(k);
if (item.getImageUrls() == null || item.getImageUrls().length == 1) continue;
String url = item.getImageUrls()[1];
String fn = Util.getImageFileName(url);
if (sets.contains(fn)) continue;
HttpDataGetItem hlgi = new HttpDataGetItem();
hlgi.setEnds(null);
hlgi.setHandle(handle);
if (item.getUrl() == null) continue;
hlgi.setUrl(changeUrl(url));
hlgItems.add(hlgi);
if (item.getImageUrls().length == 2) continue;
url = item.getImageUrls()[2];
fn = Util.getImageFileName(url);
if (sets.contains(fn)) continue;
hlgi = new HttpDataGetItem();
hlgi.setEnds(null);
hlgi.setHandle(handle);
if (item.getUrl() == null) continue;
hlgi.setUrl(changeUrl(url));
hlgItems.add(hlgi);
*/
Item item = list.get(k);
if (item.getImageUrls() == null) continue;
for (int l = 0; l < item.getImageUrls().length; l ++) {
String url = item.getImageUrls()[l];
String fn = Util.getImageFileName(url);
if (sets.contains(fn)) continue;
HttpDataGetItem hlgi = new HttpDataGetItem();
hlgi.setEnds(null);
hlgi.setHandle(handle);
hlgi.setUrl(changeUrl(url));
hlgItems.add(hlgi);
}
}
}
}
}
System.out.println(hlgItems.size());
String[] proxys = {
//"70.187.193.125:8080",
"128.208.4.199:3124",
"132.239.17.226:3124",
"35.9.27.27:3124",
"128.2.223.65:3128",
"128.8.126.111:3127",
"128.8.126.112:3124",
"128.10.19.52:3124",
//"130.37.198.244:3124",
"141.213.4.201:3124",
"141.213.4.202:3124",
"155.225.2.72:3128",
"128.193.33.8:3127",
"198.82.160.220:3124",
"155.225.2.72:3128",
};
List<String> proxies = new ArrayList<String>();
List<Integer> ports = new ArrayList<Integer>();
for (int i = 0; i < proxys.length; i ++) {
String[] ps = proxys[i].split("\\:");
proxies.add(ps[0]);
ports.add(Integer.parseInt(ps[1]));
}
System.out.println("All Item count:" + hlgItems.size());
HttpDataListGet hlg = new HttpDataListGet(hlgItems);
//hlg.setProxyList(proxies, ports);
hlg.start();
}
public static void main1(String[] args) {
Cat root = Util.loadCat(new File("d:/libox1/cats2.xml"));
List<HttpDataGetItem> hlgItems = new ArrayList<HttpDataGetItem>();
DownloadImageHandle handle = new DownloadImageHandle();
//init output files
Set<String> sets = new HashSet<String>();
File[] files = DownloadImageHandle.OUTPUT_PATH.listFiles();
for (int i = 0; i < files.length; i ++) {
/*
if (files[i].length() < 4096) {
files[i].delete();
} else {
sets.add(files[i].getName());
}*/
sets.add(files[i].getName());
}
for (int i = 0; i < root.getCats().size(); i ++) {
Cat c = root.getCats().get(i);
for (int j = 0; j < c.getCats().size(); j ++) {
Cat cat = c.getCats().get(j);
List<Item> list = cat.getItems();
if (list != null) {
for (int k = 0; k < list.size(); k ++) {
Item item = list.get(k);
if (item.getImageUrls() == null || item.getImageUrls().length == 0) continue;
String url = item.getImageUrls()[0];
String fn = Util.getImageFileName(url);
if (sets.contains(fn)) continue;
HttpDataGetItem hlgi = new HttpDataGetItem();
hlgi.setEnds(null);
hlgi.setHandle(handle);
if (item.getUrl() == null) continue;
hlgi.setUrl(changeUrl(url));
hlgItems.add(hlgi);
}
}
}
}
String[] proxys = {
//"70.187.193.125:8080",
"128.208.4.199:3124",
"132.239.17.226:3124",
"35.9.27.27:3124",
"128.2.223.65:3128",
"128.8.126.111:3127",
"128.8.126.112:3124",
"128.10.19.52:3124",
//"130.37.198.244:3124",
"141.213.4.201:3124",
"141.213.4.202:3124",
"155.225.2.72:3128",
"128.193.33.8:3127",
"198.82.160.220:3124",
"155.225.2.72:3128",
};
List<String> proxies = new ArrayList<String>();
List<Integer> ports = new ArrayList<Integer>();
for (int i = 0; i < proxys.length; i ++) {
String[] ps = proxys[i].split("\\:");
proxies.add(ps[0]);
ports.add(Integer.parseInt(ps[1]));
}
System.out.println("All Item count:" + hlgItems.size());
HttpDataListGet hlg = new HttpDataListGet(hlgItems);
//hlg.setProxyList(proxies, ports);
hlg.start();
}
public static String changeUrl(String url) {
return url.replaceAll("/images/s/", "/images/l/");
//return url;
}
public static class DownloadImageHandle implements SuperDataHandle {
private static File OUTPUT_PATH = new File("d:/libox1/imagesl_bak");
static {
try {
if (!OUTPUT_PATH.exists()) OUTPUT_PATH.mkdirs();
} catch (Exception e) {
e.printStackTrace();
}
}
public void handle(HttpDataGetItem item, byte[] bytes) {
try {
String name = Util.getImageFileName(item.getUrl());
File f = new File(OUTPUT_PATH, name);
Util.saveData(bytes, f);
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -