亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關于我們
? 蟲蟲下載站

?? segmenter.pl

?? 中文分詞算法。Perl語言編寫。wordlist.txt為詞庫。
?? PL
字號:
#!/usr/bin/perl

# Read in the lexicon
open(WRDS, "wordlist.txt") or die "Can't open wordlist\n";
while (<WRDS>) {
    chomp;
    $cwords{$_} = 1;
    if (length($_) == 6) {
	if (!exists($cwords{substr($_, 0, 4)})) { 
	    $cwords{substr($_, 0, 4)} = 2;
	}
    } 
#    if (length($_) == 8) {
#	if (!exists($cwords{substr($_, 0, 4)})) { 
#	    $cwords{substr($_, 0, 4)} = 2;
#	}
#	if (!exists($cwords{substr($_, 0, 6)})) { 
#	    $cwords{substr($_, 0, 6)} = 2;
#	}
#    } 
#    if (length($_) == 10) {
#	if (!exists($cwords{substr($_, 0, 4)})) { 
#	    $cwords{substr($_, 0, 4)} = 2;
#	}
#	if (!exists($cwords{substr($_, 0, 6)})) { 
#	    $cwords{substr($_, 0, 6)} = 2;
#	}
#	if (!exists($cwords{substr($_, 0, 8)})) { 
#	    $cwords{substr($_, 0, 8)} = 2;
#	}
#    } 
}
close(WRDS);

# Numbers
$numbers  = "零○一二三四五六七八九十百千萬億0123456789.點第";
$numbers .= "多半數幾倆卅兩壹貳叁肆伍陸柒捌玖拾伯仟";
for ($n = 0; $n < length($numbers); $n+=2) {
    $cnumbers{substr($numbers, $n, 2)} = 1;
}

# Wide ASCII words
$wascii =  "abcdefghijklmnopqrstuvwxyz.";
$wascii .= "ABCDEFGHIJKLMNOPQRSTUVWXYZ-";
$wascii .= "";
for ($n = 0; $n < length($wascii); $n+=2) {
    $cascii{substr($wascii, $n, 2)} = 1;
}

# Foreign name transliteration characters
$foreign =  "阿克拉加內亞斯貝巴爾姆愛蘭尤利西詹喬伊費杰羅納布可夫福赫勒柯特";
$foreign .= "勞倫坦史芬尼根登都伯林伍泰胥黎俄科索沃金森奧霍瓦茨普蒂塞維大利";
$foreign .= "格萊德岡薩雷墨哥弗庫澳馬哈多茲戈烏奇切諾戴里諸塞吉基延科達塔博";
$foreign .= "卡雅來莫波艾哈邁蓬安盧什比摩曼乃休合賴米那迪凱萊溫帕桑佩蒙博托";
$foreign .= "謝格澤洛及希卜魯匹齊茲印古埃努烈達累法賈圖喀土穆腓基冉休蓋耶沙";
$foreign .= "遜賓麥華萬";
for ($n = 0; $n < length($foreign); $n+=2) {
    $cforeign{substr($foreign, $n, 2)} = 1;
}

#Chinese surnames
$surname  = "艾安敖白班包寶保鮑貝畢邊卞柏卜蔡曹岑柴昌常陳成程遲池褚楚";
$surname .= "儲淳崔戴刀鄧狄刁丁董竇杜端段樊范方房斐費豐封馮鳳伏福傅蓋甘";
$surname .= "高戈耿龔宮勾茍辜谷古顧官關管桂郭韓杭郝禾何賀赫衡洪侯胡花";
$surname .= "華黃霍稽姬吉紀季賈簡翦姜江蔣焦晉金靳荊居康柯空孔匡鄺況賴藍";
$surname .= "郎朗勞樂雷冷黎李理厲利勵連廉練良梁廖林凌劉柳隆龍樓婁盧呂魯";
$surname .= "陸路倫羅洛駱麻馬麥滿茅毛梅孟米苗繆閔明莫牟穆倪聶牛鈕農潘龐";
$surname .= "裴彭皮樸平蒲溥浦戚祁齊錢強喬秦丘邱仇裘屈瞿權冉饒任榮容阮";
$surname .= "瑞芮薩賽沙單商邵佘申沈盛石史壽舒斯宋蘇孫邰譚談湯唐陶滕";
$surname .= "田佟仝屠涂萬汪王危韋魏衛蔚溫聞翁巫鄔伍武吳奚習夏鮮冼";
$surname .= "項蕭解謝辛邢幸熊徐許宣薛荀顏閻言嚴彥晏燕楊陽姚葉蟻易殷銀尹";
$surname .= "應英游尤於魚虞俞余禹喻郁尉元袁岳云臧曾查翟詹湛張章招趙甄";
$surname .= "鄭鐘周諸朱竺祝莊卓宗鄒祖左";
$uncommonsurname = "車和全時水同文席于";
for ($n = 0; $n < length($surname); $n+=2) {
    $csurname{substr($surname, $n, 2)} = 1;
}
for ($n = 0; $n < length($uncommonsurname); $n+=2) {
    $uncommoncsurname{substr($uncommonsurname, $n, 2)} = 1;
}

# Add in 2 character surnames; also add to lexicon so they'll be segmented as one unit
$csurname{"東郭"} = 1; $cwords{"東郭"} = 1;
$csurname{"公孫"} = 1; $cwords{"公孫"} = 1;
$csurname{"皇甫"} = 1; $cwords{"皇甫"} = 1;
$csurname{"慕容"} = 1; $cwords{"慕容"} = 1;
$csurname{"歐陽"} = 1; $cwords{"歐陽"} = 1;
$csurname{"單于"} = 1; $cwords{"單于"} = 1;
$csurname{"司空"} = 1; $cwords{"司空"} = 1;
$csurname{"司馬"} = 1; $cwords{"司馬"} = 1;
$csurname{"司徒"} = 1; $cwords{"司徒"} = 1;
$csurname{"澹臺"} = 1; $cwords{"澹臺"} = 1;
$csurname{"諸葛"} = 1; $cwords{"諸葛"} = 1;

#Not in name
$notname  = "的說對在和是被最所那這有將會與於他為";
$notname .= "、:,。★〖〗()⊙~【】—·?!“” ";
for ($n = 0; $n < length($notname); $n+=2) {
    $cnotname{substr($notname, $n, 2)} = 1;
}


sub add_ChineseNames {
    ($tmpline) = @_;
    $tlen = length($tmpline);
    $newline = "";
    for ($m = 0; $m < $tlen; $m++) {
	$tchar = substr($tmpline, $m, 1);
	$currtoken = "";
	if ($tchar =~ /^\s$/) { 
	    $newline .= $tchar;
	} else {
	    $currtoken = "";
	    while ($tchar !~ /^\s$/ and $m < $tlen) {
		$currtoken .= $tchar;
		$m++;
		$tchar = substr($tmpline, $m, 1);
	    }

	    if (defined($csurname{$currtoken}) or
		defined($uncommoncsurname{$currtoken})) { # found a surname, see what follows
		# go past following spaces
		$tchar = substr($tmpline, $m, 1);
		$spaces = "";
		while ($tchar =~ /\s/ and $m < $tlen) {
		    $spaces .= $tchar;
		    $m++;
		    $tchar = substr($tmpline, $m, 1);
		}
		# Get next token
		$tchar = substr($tmpline, $m, 1);
		$currtoken2 = "";
		while ($tchar !~ /\s/ and $m < $tlen) {
		    $currtoken2 .= $tchar;
		    $m++;
		    $tchar = substr($tmpline, $m, 1);
		}
		# go past following spaces
		$tchar = substr($tmpline, $m, 1);
		$spaces2 = "";
		while ($tchar =~ /\s/ and $m < $tlen) {
		    $spaces2 .= $tchar;
		    $m++;
		    $tchar = substr($tmpline, $m, 1);
		}
		# Get next token
		$tchar = substr($tmpline, $m, 1);
		$currtoken3 = "";
		while ($tchar !~ /\s/ and $m < $tlen) {
		    $currtoken3 .= $tchar;
		    $m++;
		    $tchar = substr($tmpline, $m, 1);
		}
		if (isChinese($currtoken2) and (length($currtoken2) == 2) 
		    and (!defined($cnotname{$currtoken2})) and 
		    isChinese($currtoken3) and length($currtoken3) == 2 and
		    !defined($cnotname{$currtoken3})) 
		{
		    $newline .= $currtoken . $currtoken2 . $currtoken3;
		    $cwords{$currtoken . $currtoken2 . $currtoken3} = 1;
		    $cwords{$currtoken . $currtoken2} = 2;  # short version for checking
		} elsif (isChinese($currtoken2) and (length($currtoken2) == 2) 
			 and (!defined($cnotname{$currtoken2})))
		{
		    $newline .= $currtoken . $currtoken2 . $spaces2 . $currtoken3;
		    $cwords{$currtoken . $currtoken2} = 1;
		} elsif (defined($csurname{$currtoken}) and 
			 isChinese($currtoken2) and (length($currtoken2) == 4) 
			 and (!defined($cnotname{$currtoken2})))
		{
		    $newline .= $currtoken . $currtoken2 . $spaces2 . $currtoken3;
		    $cwords{$currtoken . $currtoken2} = 1;
		    $cwords{$currtoken . substr($currtoken2, 0, 2)} = 2; # short version to check
		} elsif (defined($uncommoncsurname{$currtoken}) and 
			 isChinese($currtoken2) and (length($currtoken2) == 4) 
			 and (!defined($cnotname{$currtoken2})) 
			 and ($cwords{$currtoken2} != 1))
		{
		    $newline .= $currtoken . $currtoken2 . $spaces2 . $currtoken3;
		    $cwords{$currtoken . $currtoken2} = 1;
		    $cwords{$currtoken . substr($currtoken2, 0, 2)} = 2; # short version to check
		} else {
		    $newline .= $currtoken . $spaces . $currtoken2 . $spaces2 . $currtoken3;
		}
				 
	    } else {
		$newline .= $currtoken;
	    }
	    $m--; # reset so won't skip space
	}
    }
    
    $newline;
}


#sub cword_start {
#    my($tword) = @_;
#    if (grep(/^$tword/, @cwordlist) > 0) {
#	return 1;
#    } else {
#	return 0;
#    }
#}

sub isChinese {
    my($cchar) = @_;
    for ($b = 0; $b < length($cchar); $b++) {
	if (unpack("C", substr($cchar, $b, 1)) < 128) {
	    return 0;
	} 
    }
    return 1;
}


sub allnum {
    ($localnum) = @_;
    for ($k = 0; $k < length($localnum); $k+=2) {
	if (!defined($cnumbers{substr($localnum, $k, 2)})) {
	    return 0;
	}
    }
    return 1;
}

sub allwascii {
    ($localstr) = @_;
    for ($k = 0; $k < length($localstr); $k+=2) {
	if (!defined($cascii{substr($localstr, $k, 2)})) {
	    return 0;
	}
    }
    return 1;
}

sub allforeign {
    ($localstr) = @_;
    for ($k = 0; $k < length($localstr); $k+=2) {
	if (!defined($cforeign{substr($localstr, $k, 2)})) {
	    return 0;
	}
    }
    return 1;
}


sub segmentline() {
    my($line) = @_;

    $chinaccum = "";
    $outline = "";
    $linelen = length($line);
    for ($i = 0; $i < $linelen; $i++) {
	$char1 = substr($line, $i, 1);
	if (unpack("C", $char1) > 127) {
	    $chinchar = substr($line, $i, 2);
	    if ($chinaccum eq "") {
		$outline .= " " unless $i == 0;
		$chinaccum = $chinchar;
	    } else {
		    if (exists($cwords{$chinaccum . $chinchar}) and
			$cwords{$chinaccum . $chinchar} == 1) { # is in lexicon
			$chinaccum .= $chinchar;
		    } elsif (allnum($chinaccum) and defined($cnumbers{$chinchar})) {
			$chinaccum .= $chinchar;
		    } elsif (allwascii($chinaccum) and defined($cascii{$chinchar})) {
			$chinaccum .= $chinchar;
		    } elsif (allforeign($chinaccum) and defined($cforeign{$chinchar}) and
			     $cwords{substr($line, $i, 4)} != 1 and
			     $cwords{substr($line, $i, 4)} != 2) {
			$chinaccum .= $chinchar;
		    } elsif (exists($cwords{$chinaccum . $chinchar}) and
			     ($cwords{$chinaccum . $chinchar} == 2) and
			     exists($cwords{$chinaccum . $chinchar . substr($line, $i+2, 2)}) and
			     (($cwords{$chinaccum . $chinchar . substr($line, $i+2, 2)} == 1) or
			      ($cwords{$chinaccum . $chinchar . substr($line, $i+2, 2)} == 2)))
		    { # starts a word in the lexicon
			$chinaccum .= $chinchar;
			
		    } else {
			$outline .= $chinaccum . " ";
			$chinaccum = $chinchar;  # start anew
		    }
		}
	    $i++;
	} else {  # Plain ascii text, attach any accumulated Chinese and then ascii
	    if ($chinaccum ne "") {
		$outline .= $chinaccum . " ";
		$chinaccum = "";
	    }
	    $outline .= $char1;
	}
    }
    
    $chinline = add_ChineseNames($outline);
    $chinline;
}

1;

?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
亚洲视频网在线直播| 日本女优在线视频一区二区| 亚洲精品日日夜夜| 日韩av中文字幕一区二区三区| 久草精品在线观看| 日韩一区二区三区观看| 久久精品一区二区三区四区| 亚洲男女一区二区三区| 国产一区二区三区国产| 欧美男生操女生| **网站欧美大片在线观看| 久久99久久精品欧美| 欧美亚洲日本国产| 亚洲欧洲韩国日本视频| 久久91精品久久久久久秒播| 欧美在线999| 亚洲天堂网中文字| 粉嫩绯色av一区二区在线观看| 91精品国产欧美一区二区成人| 日韩美女啊v在线免费观看| 国产精品中文欧美| 久久亚洲精华国产精华液| 日韩av电影一区| 欧美日韩国产成人在线免费| 玉足女爽爽91| eeuss鲁片一区二区三区在线观看| 欧美精品一区男女天堂| 久久机这里只有精品| 日韩一区二区三区在线观看| 午夜婷婷国产麻豆精品| 欧美午夜电影网| 亚洲欧美色综合| 97精品国产97久久久久久久久久久久| 亚洲国产精品av| 成人午夜电影小说| 中文字幕中文字幕一区二区| 成人深夜在线观看| 综合在线观看色| 色婷婷国产精品久久包臀| 亚洲精品视频一区| 欧美三级电影在线看| 视频在线观看一区二区三区| 在线成人av网站| 久久 天天综合| 国产色一区二区| 91麻豆精品91久久久久同性| 亚洲欧美aⅴ...| 欧美午夜精品一区| 日韩国产在线一| www一区二区| 成人av网在线| 亚洲成人三级小说| 日韩免费福利电影在线观看| 国产乱人伦偷精品视频免下载| 欧美国产日韩亚洲一区| 色婷婷精品久久二区二区蜜臂av| 亚洲a一区二区| 2019国产精品| 91丝袜国产在线播放| 日韩高清电影一区| 国产女主播一区| 欧美在线不卡一区| 精油按摩中文字幕久久| 国产精品国产自产拍高清av王其| 色婷婷av一区二区三区软件| 日韩国产欧美在线观看| 国产无一区二区| 欧美日韩一区精品| 高潮精品一区videoshd| 亚洲自拍与偷拍| 精品国产99国产精品| 99久久精品国产导航| 五月综合激情日本mⅴ| 久久人人爽爽爽人久久久| 91福利资源站| 日韩精品亚洲专区| 亚洲欧洲精品一区二区三区 | 裸体一区二区三区| 国产精品乱码人人做人人爱 | 欧美成人精品3d动漫h| 成人在线视频一区| 天天做天天摸天天爽国产一区| 精品国产精品一区二区夜夜嗨| 成人av资源网站| 捆绑调教美女网站视频一区| 亚洲最快最全在线视频| 国产视频亚洲色图| 欧美va亚洲va国产综合| 欧美日韩国产综合视频在线观看 | 看片网站欧美日韩| 亚洲免费av高清| 欧美国产精品久久| 日韩精品一区二区三区在线| 日本精品一区二区三区四区的功能| 热久久免费视频| 亚洲一区自拍偷拍| 1024国产精品| 精品盗摄一区二区三区| 欧美乱妇20p| 在线一区二区观看| 播五月开心婷婷综合| 国产麻豆精品在线| 精品一区二区综合| 日韩二区三区在线观看| 亚洲成人av福利| 亚洲美女一区二区三区| 中文字幕精品在线不卡| 久久嫩草精品久久久精品 | 91精品福利在线| 99国产精品国产精品久久| 成人看片黄a免费看在线| 久久电影网电视剧免费观看| 天天色综合天天| 午夜天堂影视香蕉久久| 亚洲综合一二三区| 亚洲精选视频在线| 亚洲手机成人高清视频| 中文字幕日本乱码精品影院| 中文字幕欧美日韩一区| 中文字幕免费不卡| 亚洲欧洲色图综合| 一区二区成人在线观看| 亚洲免费观看高清完整版在线观看熊| 日韩一区日韩二区| 亚洲另类中文字| 亚洲大片免费看| 午夜影院在线观看欧美| 日本欧美在线观看| 精品一区二区在线视频| 国产麻豆精品在线观看| 成人综合婷婷国产精品久久蜜臀 | 美女网站在线免费欧美精品| 捆绑调教一区二区三区| 国内精品自线一区二区三区视频| 国产一区二区三区电影在线观看 | 久久久久国产精品免费免费搜索| 国产日韩欧美精品电影三级在线| 国产精品久久久久永久免费观看| 亚洲欧洲av另类| 一区二区三区影院| 日本成人中文字幕在线视频| 国内精品国产三级国产a久久| 成年人国产精品| 欧美私人免费视频| 精品国产一区二区三区久久影院 | 一区二区三区色| 免费人成精品欧美精品| 国产精品一区二区你懂的| 91美女蜜桃在线| 日韩免费在线观看| 亚洲美女电影在线| 久久精品国产第一区二区三区| 大美女一区二区三区| 欧美精品一级二级| 国产视频视频一区| 亚洲成人激情社区| 成人综合日日夜夜| 91麻豆精品国产91久久久久| 欧美国产日韩在线观看| 日韩高清不卡一区二区三区| 成人a区在线观看| 精品精品国产高清一毛片一天堂| ...xxx性欧美| 国产麻豆视频精品| 欧美日韩国产不卡| 中文字幕亚洲综合久久菠萝蜜| 免费成人小视频| 91成人国产精品| 亚洲欧洲美洲综合色网| 久久99国产精品久久99果冻传媒| 91首页免费视频| 中文在线一区二区| 精久久久久久久久久久| 欧美色窝79yyyycom| 自拍偷自拍亚洲精品播放| 精品一区二区三区免费播放 | 99久久综合色| 久久午夜免费电影| 日本va欧美va精品发布| 欧美性受xxxx黑人xyx性爽| 国产精品美女久久久久久久网站| 免费高清成人在线| 欧美一区二视频| 丝袜美腿亚洲一区| 欧洲精品一区二区| 亚洲猫色日本管| 91在线视频播放地址| 久久久久久久久一| 看片网站欧美日韩| 精品国产1区2区3区| 日本不卡一区二区三区高清视频| 欧美色图天堂网| 亚洲一区二区三区小说| 色妹子一区二区| 亚洲视频香蕉人妖| 99久久er热在这里只有精品15| 国产精品久久久久一区二区三区共| 日本精品免费观看高清观看| 欧美国产成人精品| 成人午夜av电影|