?? getcmireference.java
字號:
package cn.ac.cintcm.spider.cmi;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import org.apache.commons.httpclient.NameValuePair;
import org.htmlparser.tags.InputTag;
import cn.ac.cintcm.spider.ConfigUtil;
import cn.ac.cintcm.spider.FormParameter;
import cn.ac.cintcm.spider.Get2FormContent;
import cn.ac.cintcm.spider.GetFormContent;
public class GetCmiReference extends Get2FormContent {
private static String url = "http://cnki.cintcm.ac.cn:1012/cdweb/page/kanmindex.cbs";
private static String url2 = "http://cnki.cintcm.ac.cn:1012/cdweb/page/form_download.cbs";
private static final List<String> hidden =
new ArrayList<String>(Arrays.asList(new String[] {"result","Sortresult","viewstr","qx"}));
private List<String> qikan = new ArrayList<String>();
private List<String> year = new ArrayList<String>();
private BufferedWriter out;
private MSCmiReference mr;
private ParseCmiReference pc;
public GetCmiReference(String configFile) throws IOException {
super("");
Map config = ConfigUtil.loadJobConfig(configFile);
if (config != null) {
String qkmc = (String) config.get("qkmc");
if (qkmc != null) {
qikan.addAll(Arrays.asList(qkmc.split(",")));
}
String years = (String) config.get("nian");
if (years != null) {
year.addAll(Arrays.asList(years.split(",")));
}
url = (String) config.get("url");
url2 = (String) config.get("url2");
}
}//read config file
public GetCmiReference(FormParameter formParameters) {
super(formParameters);
}
@Override
public void createHandPairValues()
throws UnsupportedEncodingException {
}
public static void main(String[] args) throws IOException {
try {
String outDb = "web.mdb";
String userInput = getInputParameter();
if (userInput != null && !userInput.trim().equals("")) {
outDb = userInput;
}
(new GetCmiReference("cmijob.txt")).process(outDb, "out.txt");
} catch (IOException e) {
e.printStackTrace();
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
}
}
private static String getInputParameter() {
System.out.println("請先準備好一個空的保存抓取結果的Access文件后輸入該文件名:\n");
String parameter = null;
try {
BufferedReader inStream = new BufferedReader (
new InputStreamReader(System.in)
);
parameter = inStream.readLine();
} catch (IOException e) {
System.out.println("IOException: " + e);
}
return parameter;
}//get access file name
public void process(String msaccess, String outFile) throws IOException, ClassNotFoundException, SQLException {
mr = new MSCmiReference(msaccess);
mr.dropTable();
mr.createTable();
pc = new ParseCmiReference();
File outputFile = new File(System.getProperty("user.dir"), outFile);
out = new BufferedWriter(new FileWriter(outputFile));
System.out.println("開始抓取...");
for (Object qk : qikan) {
String km = new String(((String)qk).getBytes(),"8859_1");
NameValuePair kmValue = new NameValuePair("qkmc", km);
NameValuePair yinwenValue = new NameValuePair("db", "yinwen");
for (Object y : year) {
FormParameter params = newFormParameter();
List<NameValuePair> values = new ArrayList<NameValuePair>();
values.add(kmValue);
values.add(yinwenValue);
NameValuePair yearValue = new NameValuePair("ni", (String) y);
values.add(yearValue);
params.setNameValues(values);
InputStream result = (new GetCmiReference(params)).getContent();
System.out.println(qk + "-" + y + "完成.");
String str = GetFormContent.slurp(result);
out.write(str);
mr.addRecords(pc.parse(str));
}
}
out.close();
System.out.println("全部抓取完成.");
}// write the access file and outfile
private static FormParameter newFormParameter() {
FormParameter params = new FormParameter();
params.setUrl(url);
params.setUrl2(url2);
params.setHiddenParameters(hidden);
return params;
}
@Override
protected void createExtraPairValues(InputTag tag) {
String name = tag.getAttribute("name");
if (name.equals("qx")) {
String recordsStr = tag.getAttribute("onclick");
String temp = recordsStr.substring(recordsStr.indexOf("'") + 1, recordsStr.lastIndexOf("'"));
int recordsNum = 10;
try {
recordsNum = Integer.valueOf(temp);
} catch (NumberFormatException e) {
e.printStackTrace();
}
StringBuilder xt = new StringBuilder(",");
for (int j = 1; j <= recordsNum; j++) {
xt.append(j);
xt.append(",");
}
formParameters.getNameValues().add(new NameValuePair("strCheckvalue", xt.toString()));
}
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -