亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關于我們
? 蟲蟲下載站

?? vsm.java

?? 用TFIDF和特征增益兩種方式實現了特征向量空間的建立
?? JAVA
字號:
package yus.baseline;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;

public class Vsm {

	final static String[] FILES = { "1.txt", "2.txt", "3.txt", "4.txt",
			"5.txt", "6.txt", "7.txt", "8.txt", "9.txt", "10.txt", "11.txt",
			"12.txt", "13.txt", "14.txt", "15.txt", "16.txt", "17.txt",
			"18.txt", "19.txt", "20.txt", "21.txt", "22.txt", "23.txt",
			"24.txt", "25.txt", "26.txt", "27.txt", "28.txt", "29.txt",
			"30.txt", "31.txt", "32.txt", "33.txt", "34.txt", "35.txt",
			"36.txt", "37.txt", "38.txt", "39.txt", "40.txt", "41.txt",
			"42.txt", "43.txt", "44.txt", "45.txt", "46.txt", "47.txt",
			"48.txt", "49.txt", "50.txt", "51.txt", "52.txt", "53.txt",
			"54.txt", "55.txt", "56.txt", "57.txt", "58.txt", "59.txt",
			"60.txt", "61.txt", "62.txt", "63.txt", "64.txt", "65.txt",
			"66.txt", "67.txt", "68.txt", "69.txt", "70.txt", "71.txt",
			"72.txt", "73.txt", "74.txt", "75.txt", "76.txt", "77.txt",
			"78.txt", "79.txt", "80.txt", "81.txt", "82.txt", "83.txt",
			"84.txt", "85.txt", "86.txt", "87.txt", "88.txt", "89.txt",
			"90.txt", "91.txt", "92.txt", "93.txt", "94.txt", "95.txt",
			"96.txt", "97.txt", "98.txt", "99.txt", "100.txt" };

	final static String StopWordFile = "stop_words_ch.txt";

	final static int Dimensionality = 100;

	final static double MIN = -9999.0;

	public static void main(String[] args) throws IOException {

		long startTime = System.currentTimeMillis();
		Map<String, Integer> wordInFileHM = new HashMap<String, Integer>();// N(f,w)
		Map<String, Integer> wordHM = new HashMap<String, Integer>();// N(w)
		Map<String, Integer> totalWordHM = new HashMap<String, Integer>();// 每個文件詞的總數,所有文件詞的總數
		Map<String, Double> valueHM = new HashMap<String, Double>();// 詞語的信息增益
		List<String> wordList = new ArrayList<String>();
		wordList = statistic(wordInFileHM, wordHM, totalWordHM, wordList);

		computePlusValue(valueHM, wordInFileHM, wordHM, totalWordHM, wordList);

		selectFeatureByIG(wordList, valueHM);

		selectFeatureByTFIDF(wordList, wordInFileHM, totalWordHM);

		long endTime = System.currentTimeMillis();
		System.out.println("The spending time is: "
				+ String.valueOf(endTime - startTime) + " ms");
	}

	private static String[][][] selectFeatureByIG(List<String> wordList,
			Map<String, Double> valueHM) {
		String[] waitSelect = new String[wordList.size()];
		int i = 0;
		for (String word : wordList) {
			waitSelect[i++] = word;
		}
		px(waitSelect, valueHM);

		String[][][] featurePlus = new String[FILES.length][Dimensionality][2];
		for (int m = 0; m < FILES.length; m++) {
			for (int n = 0; n < Dimensionality; n++) {
				featurePlus[m][n][0] = waitSelect[n];
				if (valueHM.get(FILES[m] + "/" + waitSelect[n]) == null) {
					featurePlus[m][n][1] = String.valueOf(MIN);
				} else {
					featurePlus[m][n][1] = String.valueOf(valueHM.get(FILES[m]
							+ "/" + waitSelect[n]));
				}

			}
		}
		return featurePlus;
	}

	private static String[][][] selectFeatureByTFIDF(List<String> wordList,
			Map<String, Integer> wordInFileHM, Map<String, Integer> totalWordHM) {
		// TODO Auto-generated method stub
		String[] waitSelect = new String[wordList.size()];
		int i = 0;
		for (String word : wordList) {
			waitSelect[i++] = word;
		}

		String[][][] featureVSM = new String[FILES.length][Dimensionality][2];
		int[] nj = new int[wordList.size()];
		int k = 0;
		for (String word : wordList) {
			for (int j = 0; j < FILES.length; j++) {
				if (wordInFileHM.get(FILES[j] + "/" + word) != null) {
					nj[k]++;
				}
			}
			k++;
		}

		Map<String, Double> tfidfHM = new HashMap<String, Double>();
		int fileLen = FILES.length, wordLen = waitSelect.length;

		for (int m = 0; m < fileLen; m++) {
			for (int n = 0; n < wordLen; n++) {
				double subValue = 0;
				try {
					subValue = wordInFileHM.get(FILES[m] + "/" + waitSelect[n])
							* Math.log(totalWordHM.get(
									"totalWords" + "/" + "allFiles")
									.doubleValue()
									/ nj[n]);
				} catch (Exception e) {
					subValue = 0;
				}

				try {
					double v = tfidfHM.get(waitSelect[n]);
					tfidfHM.put(waitSelect[n], v + subValue);
				} catch (Exception e) {
					tfidfHM.put(waitSelect[n], subValue);
				}

				tfidfHM.put(FILES[m] + "/" + waitSelect[n], subValue);
			}
		}

		px(waitSelect, tfidfHM);

		for (int m = 0; m < fileLen; m++) {
			for (int n = 0; n < Dimensionality; n++) {
				featureVSM[m][n][0] = waitSelect[n];
				if (wordInFileHM.get(FILES[m] + "/" + waitSelect[n]) == null) {
					featureVSM[m][n][1] = String.valueOf(0);
				} else {
					featureVSM[m][n][1] = String.valueOf(tfidfHM
							.get(waitSelect[n]));
				}

			}
		}

		return featureVSM;
	}

	/*
	 * 排序
	 */
	private static void px(String[] waitSelect, Map<String, Double> valueHM) {
		// TODO Auto-generated method stub
		if (waitSelect.length <= 1)
			return;
		double d = valueHM.get(waitSelect[0]);
		String s = waitSelect[0];
		int i = 0, j = waitSelect.length - 1;
		while (i < j) {

			while (valueHM.get(waitSelect[j]) < d && i < j) {
				j--;
			}

			if (i < j) {
				waitSelect[i] = waitSelect[j];
				i++;
			}

			while (valueHM.get(waitSelect[i]) > d && i < j) {
				i++;
			}
			if (i < j) {
				waitSelect[j] = waitSelect[i];
				j--;
			}
		}
		waitSelect[i] = s;
		String[] s1 = new String[i];
		for (int k = 0; k < s1.length; k++) {
			s1[k] = waitSelect[k];
		}
		String[] s2 = new String[waitSelect.length - i - 1];
		for (int k = 0; k < s2.length; k++) {
			s2[k] = waitSelect[i + 1 + k];
		}

		px(s1, valueHM);
		px(s2, valueHM);
		for (int k = 0; k < s1.length; k++) {
			waitSelect[k] = s1[k];
		}
		for (int k = 0; k < s2.length; k++) {
			waitSelect[i + 1 + k] = s2[k];
		}
	}

	/*
	 * 計算信息增益
	 */
	private static void computePlusValue(Map<String, Double> valueHM,
			Map<String, Integer> wordInFileHM, Map<String, Integer> wordHM,
			Map<String, Integer> totalWordHM, List<String> wordList) {
		// TODO Auto-generated method stub
		int fileLen = FILES.length;
		for (int i = 0; i < fileLen; i++) {
			for (String word : wordList) {
				if (wordInFileHM.get(FILES[i] + "/" + word) != null) {
					double wf, w, nwf, nf;
					wf = wordInFileHM.get(FILES[i] + "/" + word).doubleValue();
					w = wordHM.get("total" + "/" + word).doubleValue();
					nwf = totalWordHM.get("totalWords" + "/" + FILES[i])
							.doubleValue()
							- wf;
					nf = totalWordHM.get("totalWords" + "/" + "allFiles")
							.doubleValue()
							- w;
					double subValue = wf * Math.log(wf / w * fileLen) + nwf
							* Math.log(nwf / nf * fileLen);
					try {
						double v = valueHM.get(word);
						valueHM.put(word, v + subValue);
					} catch (Exception e) {
						valueHM.put(word, subValue);
					}

					valueHM.put(FILES[i] + "/" + word, subValue);
				} else {
					double nwf = totalWordHM.get("totalWords" + "/" + FILES[i])
							.doubleValue();
					double nf = totalWordHM
							.get("totalWords" + "/" + "allFiles").doubleValue()
							- wordHM.get("total" + "/" + word).doubleValue();
					double subValue = nwf * Math.log(nwf / nf * fileLen);

					try {
						double v = valueHM.get(word);
						valueHM.put(word, v + subValue);
					} catch (Exception e) {
						valueHM.put(word, subValue);
					}

					valueHM.put(FILES[i] + "/" + word, subValue);
				}
			}
		}
	}

	private static List<String> statistic(Map<String, Integer> wordInFileHM,
			Map<String, Integer> wordHM, Map<String, Integer> totalWordHM,
			List<String> wordList) throws IOException {
		// TODO Auto-generated method stub!
		Map<String, Integer> stopWordHM = new HashMap<String, Integer>();
		getStopWordTable(stopWordHM);

		int fileLen = FILES.length;
		String path = new File("").getAbsolutePath() + "/text/desFile/";
		for (int i = 0; i < fileLen; i++) {
			BufferedReader br = new BufferedReader(new InputStreamReader(
					new FileInputStream(path + FILES[i])));
			String line = br.readLine();
			while (line != null) {
				if (!"".equals(line.trim())) {
					StringTokenizer tokenizer = new StringTokenizer(line);
					while (tokenizer.hasMoreTokens()) {
						String cluster = tokenizer.nextToken().trim();
						String[] a = cluster.split("/");

						if (stopWordHM.get(a[0]) == null) {
							a[0] = delQJFH(a[0]);
							if (!"".equals(a[0])) {
								addToDic(wordInFileHM, FILES[i], a[0].trim());
								addToDic(wordHM, "total", a[0].trim());
								addToDic(totalWordHM, "totalWords", FILES[i]);
								addToDic(totalWordHM, "totalWords", "allFiles");
								if (!wordList.contains(a[0])) {
									wordList.add(a[0]);
								}
							}

						}
					}
				}
				line = br.readLine();
			}
			br.close();
		}

		return delSmallWord(wordList, wordHM);
	}

	private static List<String> delSmallWord(List<String> wordList,
			Map<String, Integer> wordHM) {
		// TODO Auto-generated method stub
		int threshold = 10;
		List<String> list = new ArrayList<String>();
		for (String word : wordList) {
			if (wordHM.get("total/" + word) > threshold) {
				list.add(word);
			}
		}
		return list;
	}

	private static String delQJFH(String s) throws UnsupportedEncodingException {
		// TODO Auto-generated method stub
		try {
			while (isSymbol(s)) {
				s = s.substring(1);
			}
			return s;
		} catch (Exception e) {
			return "";
		}

	}

	private static boolean isSymbol(String s)
			throws UnsupportedEncodingException {
		byte[] b = s.getBytes("GBK");
		if (b[0] == -95)
			return true;
		else
			return false;
	}

	private static void addToDic(Map<String, Integer> hm, String file,
			String word) {
		// TODO Auto-generated method stub
		String s = file + "/" + word;
		try {
			int v = hm.get(s);
			hm.put(s, v + 1);
		} catch (Exception e) {
			hm.put(s, 1);
		}
	}

	private static void getStopWordTable(Map<String, Integer> stopWordHM)
			throws IOException {
		BufferedReader br = new BufferedReader(new InputStreamReader(
				new FileInputStream(StopWordFile)));
		String line = br.readLine();
		int i = 0;
		while (line != null) {
			stopWordHM.put(line.trim(), i++);
			line = br.readLine();
		}
		br.close();
	}

}

?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
欧美日韩国产一区二区三区地区| 亚洲女性喷水在线观看一区| 国产精品国产三级国产专播品爱网| 亚洲香肠在线观看| 国产精品88av| 欧美一级二级在线观看| 亚洲男女毛片无遮挡| 国产精品羞羞答答xxdd| 日韩欧美电影在线| 亚洲成人免费在线观看| 97se亚洲国产综合自在线不卡| 久久综合色之久久综合| 日本不卡高清视频| 欧美丝袜丝交足nylons| 亚洲欧美另类在线| gogogo免费视频观看亚洲一| 国产亚洲成av人在线观看导航 | 韩国一区二区视频| 欧美日韩国产经典色站一区二区三区 | 一区二区三区中文在线| 不卡的电影网站| 亚洲国产成人私人影院tom| 99v久久综合狠狠综合久久| 精品区一区二区| 美日韩黄色大片| 91精品国产乱| 视频一区视频二区中文字幕| 欧美日免费三级在线| 亚洲综合清纯丝袜自拍| 在线视频国内自拍亚洲视频| 夜夜夜精品看看| 欧美性做爰猛烈叫床潮| 午夜精品国产更新| 欧美日韩高清影院| 日韩电影免费一区| 日韩精品一区在线| 韩国女主播一区| 亚洲国产精品99久久久久久久久| 国产999精品久久| 1区2区3区国产精品| 色先锋资源久久综合| 亚洲午夜精品网| 日韩视频免费观看高清完整版在线观看 | 欧美一区三区二区| 麻豆成人综合网| 久久久噜噜噜久久中文字幕色伊伊| 国产精品综合网| 国产精品久久久久影院| 91麻豆123| 亚洲mv大片欧洲mv大片精品| 欧美一区二区三区电影| 国产一区高清在线| 亚洲欧洲成人精品av97| 欧美揉bbbbb揉bbbbb| 极品美女销魂一区二区三区免费| 国产欧美一区二区三区网站| 色综合久久久网| 日日夜夜精品视频免费| 久久人人97超碰com| 91免费国产在线| 天天综合色天天综合| 久久久国际精品| 在线看日韩精品电影| 激情综合色综合久久| 自拍偷拍亚洲激情| 欧美一区二区三区播放老司机| 国产成人在线看| 午夜在线成人av| 久久久国产午夜精品| 在线免费观看成人短视频| 国产一区二区三区免费观看| 亚洲欧美日韩久久精品| 精品国产乱码久久久久久久| 91色porny| 国产一区二区三区黄视频| 一区二区三区视频在线看| 久久免费的精品国产v∧| 欧美精品一区二区久久婷婷 | 日本道免费精品一区二区三区| 欧美a级一区二区| 日韩毛片视频在线看| 欧美一卡二卡在线| 91黄色在线观看| 风间由美一区二区三区在线观看 | 另类欧美日韩国产在线| 亚洲三级免费观看| 久久久久国产精品免费免费搜索| 欧美精品日韩一本| 色婷婷狠狠综合| 不卡高清视频专区| 国产成人午夜高潮毛片| 免费在线观看日韩欧美| 亚洲国产色一区| 亚洲欧美日本在线| 国产精品婷婷午夜在线观看| 欧美va亚洲va| 日韩一级大片在线| 欧美另类变人与禽xxxxx| 日本久久一区二区三区| 国产91清纯白嫩初高中在线观看| 久久国产尿小便嘘嘘尿| 舔着乳尖日韩一区| 亚洲在线一区二区三区| 亚洲伦理在线免费看| 中文字幕一区二区三区色视频 | 国产99久久久精品| 国产毛片精品视频| 国内精品在线播放| 精品一区二区三区在线观看| 日韩成人一级大片| 日本美女一区二区三区视频| 日韩激情中文字幕| 日本特黄久久久高潮| 秋霞电影网一区二区| 日韩影院在线观看| 日本视频在线一区| 日本不卡视频在线观看| 麻豆精品一区二区综合av| 日韩国产在线观看一区| 热久久免费视频| 一本大道久久a久久精品综合| 91首页免费视频| 欧美熟乱第一页| 91精品国产乱| 久久久亚洲高清| 国产精品毛片久久久久久| 亚洲同性gay激情无套| 亚洲综合在线第一页| 午夜精品一区二区三区免费视频| 日本aⅴ免费视频一区二区三区| 日本欧洲一区二区| 韩国v欧美v日本v亚洲v| 成人av电影免费观看| 在线看国产日韩| 欧美一区二区视频网站| 国产亚洲精久久久久久| 亚洲桃色在线一区| 日韩福利视频网| 国产精品一级在线| 色呦呦日韩精品| 日韩午夜电影在线观看| 国产日产亚洲精品系列| 亚洲一区在线观看网站| 久久99最新地址| 99久久免费视频.com| 在线播放一区二区三区| 久久九九国产精品| 亚洲综合在线电影| 精品写真视频在线观看| 91免费看`日韩一区二区| 在线综合+亚洲+欧美中文字幕| 国产偷国产偷精品高清尤物| 亚洲综合一区在线| 国产传媒一区在线| 欧美视频三区在线播放| 久久精品这里都是精品| 亚洲夂夂婷婷色拍ww47| 国产精品性做久久久久久| 欧美性一二三区| 国产女同性恋一区二区| 日韩精品福利网| 一本到三区不卡视频| 久久嫩草精品久久久精品| 午夜精品久久久久久久99水蜜桃| 国产成人免费av在线| 欧美片网站yy| 亚洲欧美乱综合| 国产成a人亚洲| 欧美成人午夜电影| 亚洲成人免费观看| 91丨porny丨蝌蚪视频| 国产午夜精品福利| 九九**精品视频免费播放| 欧美视频你懂的| 日韩 欧美一区二区三区| 成人av在线影院| 26uuu欧美日本| 秋霞影院一区二区| 欧美日韩免费在线视频| 亚洲欧洲www| av不卡免费在线观看| 久久精品视频一区二区| 久久99国产乱子伦精品免费| 欧美美女喷水视频| 亚洲电影中文字幕在线观看| 色哟哟日韩精品| 亚洲人被黑人高潮完整版| 成人涩涩免费视频| 久久久精品综合| 国产成人精品一区二| 久久综合狠狠综合| 极品销魂美女一区二区三区| 日韩一区二区三区三四区视频在线观看 | 五月激情丁香一区二区三区| 在线欧美日韩精品| 亚洲精品高清在线| 色拍拍在线精品视频8848| 亚洲另类在线制服丝袜| 色狠狠av一区二区三区| 一区二区欧美精品|