亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關于我們
? 蟲蟲下載站

?? spamfilter.cs

?? this source about naive bayes program and nice program
?? CS
字號:
using System;
using System.Collections;
using System.Collections.Generic;
using System.Data;
using System.Diagnostics;
using System.IO;
using System.Text.RegularExpressions;

namespace Expat.Bayesian
{
	/// <summary>
	/// Naive Baysiam Spam Filter.  Basically, an implementation of this:
	/// http://www.paulgraham.com/spam.html
	/// </summary>
	public class SpamFilter
	{
		#region knobs for dialing in performance
		/// <summary>
		/// These are constants used in the Bayesian algorithm, presented in a form that lets you monkey with them.
		/// </summary>
		public class KnobList
		{
														// Values in PG's original article:
			public int GoodTokenWeight = 2;				// 2
			public int MinTokenCount = 0;				// 0
			public int MinCountForInclusion = 5;		// 5
			public double MinScore = 0.011;				// 0.01
			public double MaxScore = 0.99;				// 0.99
			public double LikelySpamScore = 0.9998;		// 0.9998
			public double CertainSpamScore = 0.9999;	// 0.9999
			public int CertainSpamCount = 10;			// 10
			public int InterestingWordCount = 15;		// 15 (later changed to 20)
		}

		private KnobList _knobs = new KnobList();

		/// <summary>
		/// These are the knobs you can turn to dial in performance on the algorithm.
		/// Hopefully the names make a little bit of sense and you can find where
		/// they fit into the original algorithm.
		/// </summary>
		public KnobList Knobs
		{
			get { return _knobs; }
			set { _knobs = value; }
		}

		#endregion

		private Corpus _good;
		private Corpus _bad;
		private SortedDictionary<string, double> _prob;
		private int _ngood;
		private int _nbad;

		#region properties
		/// <summary>
		/// A list of words that show tend to show up in Spam text
		/// </summary>
		public Corpus Bad
		{
			get { return _bad; }
			set { _bad = value; }
		}

		/// <summary>
		/// A list of words that tend to show up in non-spam text
		/// </summary>
		public Corpus Good
		{
			get { return _good; }
			set { _good = value; }
		}

		/// <summary>
		/// A list of probabilities that the given word might appear in a Spam text
		/// </summary>
		public SortedDictionary<string, double> Prob
		{
			get { return _prob; }
			set { _prob = value; }
		}
		#endregion

		#region population

		/// <summary>
		/// Initialize the SpamFilter based on the supplied text
		/// </summary>
		/// <param name="goodReader"></param>
		/// <param name="badReader"></param>
		public void Load(TextReader goodReader, TextReader badReader)
		{
			_good = new Corpus(goodReader);
			_bad = new Corpus(badReader);

			CalculateProbabilities();
		}

		/// <summary>
		/// Initialize the SpamFilter based on the contents of the supplied Corpuseses
		/// </summary>
		/// <param name="good"></param>
		/// <param name="bad"></param>
		public void Load(Corpus good, Corpus bad)
		{
			_good = good;
			_bad = bad;

			CalculateProbabilities();
		}

		/// <summary>
		/// Initialize the SpamFilter based on a DataTable containing columns "IsSpam" and "Body".
		/// This is only useful to me the author, but hey, it's my code so I can do what I want!
		/// </summary>
		/// <param name="table"></param>
		public void Load(DataTable table)
		{
			_good = new Corpus();
			_bad = new Corpus();

			foreach (DataRow row in table.Rows)
			{
				bool isSpam = (bool)row["IsSpam"];
				string body = row["Body"].ToString();
				if (isSpam)
				{
					_bad.LoadFromReader(new StringReader(body));
				}
				else
				{
					_good.LoadFromReader(new StringReader(body));
				}
			}

			CalculateProbabilities();
		}

		/// <summary>
		/// Do the math to populate the probabilities collection
		/// </summary>
		private void CalculateProbabilities()
		{
			_prob = new SortedDictionary<string, double>();

			_ngood = _good.Tokens.Count;
			_nbad = _bad.Tokens.Count;
			foreach (string token in _good.Tokens.Keys)
			{
				CalculateTokenProbability(token);
			}
			foreach (string token in _bad.Tokens.Keys)
			{
				if (!_prob.ContainsKey(token))
				{
					CalculateTokenProbability(token);
				}
			}
		}

		/// <summary>
		/// For a given token, calculate the probability that will appear in a spam text
		/// by comparing the number of good and bad texts it appears in already.
		/// </summary>
		/// <param name="token"></param>
		private void CalculateTokenProbability(string token)
		{
			/*
			 * This is a direct implementation of Paul Graham's algorithm from
			 * http://www.paulgraham.com/spam.html
			 * 
			 *	(let ((g (* 2 (or (gethash word good) 0)))
			 *		  (b (or (gethash word bad) 0)))
			 *	   (unless (< (+ g b) 5)
			 *		 (max .01
			 *			  (min .99 (float (/ (min 1 (/ b nbad))
			 *								 (+ (min 1 (/ g ngood))   
			 *									(min 1 (/ b nbad)))))))))
			 */

			int g = _good.Tokens.ContainsKey(token) ? _good.Tokens[token] * Knobs.GoodTokenWeight : 0;
			int b = _bad.Tokens.ContainsKey(token) ? _bad.Tokens[token] : 0;

			if (g + b >= Knobs.MinCountForInclusion)
			{
				double goodfactor = Min(1, (double)g / (double)_ngood);
				double badfactor = Min(1, (double)b / (double)_nbad);

				double prob = Max(Knobs.MinScore,
								Min(Knobs.MaxScore, badfactor / (goodfactor + badfactor))
							);

				// special case for Spam-only tokens.
				// .9998 for tokens only found in spam, or .9999 if found more than 10 times
				if (g == 0)
				{
					prob = (b > Knobs.CertainSpamCount) ? Knobs.CertainSpamScore : Knobs.LikelySpamScore;
				}

				_prob[token] = prob;
			}
		}
#endregion

#region serialization
		/// <summary>
		/// Dumps the probability list to a file, preceded by a line containing good, bad and probability counts.
		/// </summary>
		/// <param name="filePath"></param>
		public void ToFile(string filePath)
		{
			using (FileStream fs = new FileStream(filePath, FileMode.Create, FileAccess.Write))
			{
				StreamWriter writer = new StreamWriter(fs);

				writer.WriteLine(String.Format("{0},{1},{2}", _ngood, _nbad, _prob.Count));
				foreach (string key in _prob.Keys)
				{
					writer.WriteLine(String.Format("{0},{1}", _prob[key].ToString("#.#####"), key));
				}

				writer.Flush();
				fs.Close();
			}
		}

		/// <summary>
		/// Populate from a file created with ToFile().
		/// </summary>
		/// <param name="filePath"></param>
		public void FromFile(string filePath)
		{
			_prob = new SortedDictionary<string, double>();
			using (FileStream fs = new FileStream(filePath, FileMode.Open, FileAccess.Read))
			{
				StreamReader reader = new StreamReader(fs);
				
				ParseCounts(reader.ReadLine());

				while (!reader.EndOfStream)
				{
					ParseProb(reader.ReadLine());
				}

				fs.Close();
			}
		}

		private void ParseCounts(string line)
		{
			string[] tokens = line.Split(',');
			if (tokens.Length > 1)
			{
				_ngood = Convert.ToInt32(tokens[0]);
				_nbad = Convert.ToInt32(tokens[1]);
			}
		}

		private void ParseProb(string line)
		{
			string[] tokens = line.Split(',');
			if (tokens.Length > 1)
			{
				_prob.Add(tokens[1], Convert.ToDouble(tokens[0]));
			}
		}

#endregion

		#region spam testing
		/// <summary>
		/// Returns the probability that the supplied body of text is spam
		/// </summary>
		/// <param name="body"></param>
		/// <returns></returns>
		public double Test(string body)
		{
			SortedList probs = new SortedList();

			// Spin through every word in the body and look up its individual spam probability.
			// Keep the list in decending order of "Interestingness"
			Regex re = new Regex(Corpus.TokenPattern, RegexOptions.Compiled);
			Match m = re.Match(body);
			int index=0;
			while (m.Success)
			{
				string token = m.Groups[1].Value;
				if (_prob.ContainsKey(token))
				{
					// "interestingness" == how far our score is from 50%.  
					// The crazy math below is building a string that lets us sort alphabetically by interestingness.
					double prob = _prob[token];
					string key = (0.5 - Math.Abs(0.5 - prob)).ToString(".00000") + token + index++;
					probs.Add(key, prob);

				}

				m = m.NextMatch();
			}

			/* Combine the 15 most interesting probabilities together into one.  
			 * The algorithm to do this is shown below and described here:
			 * http://www.paulgraham.com/naivebayes.html
			 * 
			 *				abc           
			 *	---------------------------
			 *	abc + (1 - a)(1 - b)(1 - c)
			 *
			 */

			double mult = 1;  // for holding abc..n
			double comb = 1;  // for holding (1 - a)(1 - b)(1 - c)..(1-n)
			index = 0;
			foreach (string key in probs.Keys)
			{
				double prob = (double)probs[key];
				mult = mult * prob;
				comb = comb * (1 - prob);

				Debug.WriteLine(index + " " + probs[key] + " " + key );

				if (++index > Knobs.InterestingWordCount)
					break;
			}

			return mult / (mult + comb);

		}
		#endregion

		#region helpers

		private double Max(double one, double two)
		{
			return one > two ? one : two;
		}

		private double Min(double one, double two)
		{
			return one < two ? one : two;
		}
		#endregion
	}
}

?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
麻豆国产精品一区二区三区| 成人一级片网址| 亚洲欧美日韩在线不卡| 国产精品免费看片| 国产丝袜在线精品| 国产欧美一区二区三区网站| 欧美不卡一区二区| 欧美精品丝袜久久久中文字幕| 日本高清无吗v一区| 色综合久久综合网欧美综合网| eeuss国产一区二区三区| 99久久综合精品| 欧亚一区二区三区| 欧美精品乱码久久久久久按摩| 欧美日韩国产成人在线免费| 91精品国产色综合久久| 精品少妇一区二区三区在线视频| 欧美一区二区三区四区久久| 日韩一级精品视频在线观看| 久久久久国产一区二区三区四区 | 一本色道久久综合亚洲aⅴ蜜桃| 99久久99久久综合| 欧美午夜影院一区| 久久麻豆一区二区| 成人性视频网站| 99视频精品免费视频| 91福利国产成人精品照片| 制服视频三区第一页精品| 日韩一二三区视频| 国产亚洲精品精华液| 中文字幕在线不卡视频| 夜夜操天天操亚洲| 日韩高清电影一区| 国产一区二区三区四| 99精品久久只有精品| 欧美视频一区二区三区在线观看| 91精品国产aⅴ一区二区| 久久蜜臀中文字幕| 亚洲精品成a人| 男人的j进女人的j一区| 春色校园综合激情亚洲| 欧美在线观看视频在线| 欧美成人综合网站| 中文字幕成人在线观看| 亚洲国产综合人成综合网站| 国内精品写真在线观看| 91视视频在线观看入口直接观看www| 欧美色区777第一页| 久久奇米777| 一区二区三区**美女毛片| 青青草精品视频| voyeur盗摄精品| 91精品国产麻豆| 中文字幕亚洲电影| 日韩av网站免费在线| 成人精品亚洲人成在线| 91精品在线麻豆| 秋霞影院一区二区| 亚洲精品大片www| 国产一区二区剧情av在线| 欧美在线你懂得| 国产亚洲欧洲一区高清在线观看| 亚洲欧美电影一区二区| 狠狠久久亚洲欧美| 欧美视频日韩视频| 国产日韩一级二级三级| 天涯成人国产亚洲精品一区av| 国产成人福利片| 欧美一区二区在线免费观看| |精品福利一区二区三区| 九九国产精品视频| 欧美私人免费视频| 1区2区3区精品视频| 国内外精品视频| 欧美日本不卡视频| 亚洲人吸女人奶水| 国产精品123| 日韩欧美中文字幕精品| 亚洲国产美女搞黄色| 99久久er热在这里只有精品15| 久久综合色天天久久综合图片| 视频在线观看一区| 色婷婷国产精品| 国产精品毛片a∨一区二区三区| 麻豆精品一区二区综合av| 欧美日韩黄视频| 亚洲精品乱码久久久久久| 成人国产精品免费观看视频| 久久久亚洲高清| 久久99精品国产麻豆婷婷| 欧美日韩一区小说| 一区二区在线免费观看| jizz一区二区| 中文字幕欧美国产| 精品视频在线免费观看| 欧美性受xxxx黑人xyx| 亚洲视频综合在线| 91美女福利视频| 最新国产の精品合集bt伙计| 粉嫩aⅴ一区二区三区四区| 日韩免费看的电影| 麻豆传媒一区二区三区| 日韩一区二区在线观看视频播放 | 99在线精品一区二区三区| 国产亚洲制服色| 国内精品伊人久久久久av影院| 欧美高清视频一二三区| 日韩激情在线观看| 宅男在线国产精品| 日韩国产欧美在线观看| 日韩一区二区在线免费观看| 久久91精品国产91久久小草| 欧美va亚洲va| 国产成人自拍网| 中文字幕 久热精品 视频在线| 国产成人亚洲综合色影视| 国产精品丝袜在线| aaa欧美色吧激情视频| 又紧又大又爽精品一区二区| 91行情网站电视在线观看高清版| 亚洲制服欧美中文字幕中文字幕| 色婷婷久久综合| 亚洲 欧美综合在线网络| 欧美一级黄色片| 国产乱人伦偷精品视频免下载| 久久久精品蜜桃| 9i在线看片成人免费| 一区二区视频在线| 欧美日韩三级一区| 另类小说欧美激情| 国产欧美一区二区精品婷婷| av日韩在线网站| 亚洲欧美激情小说另类| 91国产视频在线观看| 午夜a成v人精品| 精品剧情在线观看| 成人在线一区二区三区| 亚洲精选免费视频| 欧美一区三区二区| 高清不卡一区二区在线| 尤物在线观看一区| 欧美不卡一区二区三区四区| 成人精品一区二区三区中文字幕| 亚洲精品免费视频| 日韩一级片在线播放| 波多野结衣精品在线| 亚洲va欧美va人人爽午夜| 337p粉嫩大胆色噜噜噜噜亚洲| 91免费在线播放| 麻豆国产欧美日韩综合精品二区 | 国产麻豆视频一区| 亚洲婷婷在线视频| 欧美日韩在线三级| 国产精品一区不卡| 亚洲午夜一区二区三区| 久久色成人在线| 欧美综合一区二区三区| 国产在线一区观看| 午夜在线电影亚洲一区| 国产色产综合产在线视频| 欧美午夜一区二区| 懂色av一区二区夜夜嗨| 午夜欧美2019年伦理| 亚洲国产经典视频| 7777女厕盗摄久久久| 成人美女在线观看| 美女视频黄久久| 亚洲嫩草精品久久| 国产亚洲一区二区三区在线观看 | 久久九九久久九九| 欧美日韩三级在线| 91色porny蝌蚪| 日韩电影在线观看电影| 亚洲视频免费在线| 久久久不卡网国产精品一区| 欧美日韩1区2区| 一本一本久久a久久精品综合麻豆| 韩国视频一区二区| 视频在线观看一区| 亚洲小少妇裸体bbw| 国产精品网曝门| 精品免费一区二区三区| 欧美日韩性生活| 91麻豆国产精品久久| 国产成人一级电影| 国产一区二区影院| 秋霞国产午夜精品免费视频| 亚洲国产精品久久久久婷婷884 | 亚洲综合男人的天堂| 国产精品丝袜一区| 久久综合999| 欧美电影精品一区二区| 欧美高清hd18日本| 欧美在线短视频| 色狠狠桃花综合| 99久久综合国产精品| www.性欧美| av一区二区三区在线| 粗大黑人巨茎大战欧美成人| 国产盗摄女厕一区二区三区|