?? index.java
字號:
package src;
import java.io.*;
import java.util.*;
import java.util.Map.Entry;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.tartarus.snowball.SnowballStemmer;
import org.tartarus.snowball.ext.englishStemmer;
/**
* @author Administrator
*
*/
public class Index {
File directory = new File("test-files");
File[] files = directory.listFiles();
HashMap<String,TreeSet<File>> map = new HashMap<String, TreeSet<File>>();
public void generateIndex(){
for (int i = 0;i < files.length;i ++){
//Deal with the doc documents
String extension = files[i].toString().substring(files[i].toString().length() - 4);
if (extension.equalsIgnoreCase(".doc")){
try{
FileInputStream is = new FileInputStream(files[i]);
WordExtractor extractor = new WordExtractor(is);
StringTokenizer text = new StringTokenizer(extractor.getText()," ,.<>/?;:'[{]}\\|-_=+`~,《。》、?;:‘“”【{】}、|-——=+!@#$%^&*()\"\r\t\n");
dealWithAFile(text, files[i]);
}catch(IOException e){
System.out.println(e);
}
}
//Deal with the txt documents
else if (extension.equalsIgnoreCase(".txt")){
try{
BufferedReader br = new BufferedReader( new FileReader (files[i]) );
StringBuffer sb = new StringBuffer();
String line = br.readLine();
while (line !=null){
sb.append(line);
line = br.readLine();
}
br.close();
StringTokenizer text = new StringTokenizer(sb.toString()," ,.<>/?;:'[{]}\\|-_=+`~,《。》、?;:‘“”【{】}、|-——=+!@#$%^&*()\"\r\t\n");
dealWithAFile(text, files[i]);
}catch(IOException e){
System.out.println(e);
}
}
}
storeInFile();
}
/*Deal with a file */
public void dealWithAFile(StringTokenizer text,File file){
while (text.hasMoreTokens()){
SnowballStemmer stemmer = new englishStemmer();
stemmer.setCurrent(text.nextToken().toLowerCase());
stemmer.stem();
String token = stemmer.getCurrent();
//When the key word has a map item
if (map.containsKey(token)){
map.get(token).add(file);
}
//When the key word doesn't has a map item
else{
TreeSet<File> temp = new TreeSet<File>();
temp.add(file);
map.put(token, temp);
}
}
}
private void storeInFile(){
// System.out.println(map.size());
try{
Iterator<Entry<String, TreeSet<File>>> iterator = map.entrySet().iterator();
FileWriter index = new FileWriter("index");
while (iterator.hasNext()){
Entry<String, TreeSet<File>> temp = iterator.next();
index.append(temp.getKey() + ":");
Iterator<File> iteratorFile = temp.getValue().iterator();
while (iteratorFile.hasNext()){
String path = iteratorFile.next().getPath();
index.append(path.substring(path.indexOf("\\") + 1) + "|");
}
index.append("\n");
}
index.close();
}catch(IOException e){
System.out.println(e);
}
}
public static void main(String args[]){
Index i = new Index();
i.generateIndex();
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -