?? shang.m
字號:
%基于熵的屬性離散化方法,參見《一種基于熵的連續屬性離散化方法》
function [result,dds]=shang(data)
[m,n]=size(data);
result=zeros(10,5);
for i=1:n
%%%%%%%%%%%%%%%%%計算劃分點
setpoints=[];
seqs=[];
locs=[];
%%%%%%%%%%%%%%%當出現有相通屬性值的樣本點時,去掉其余的只保留一個,為后面
%為后面計算斷點做準備
[seqs,locs]=sort(data(:,i));
se=seqs;
for bbs=1:m-1
if se(bbs)==se(bbs+1)
qs=bbs+1;
while se(bbs)==se(qs)
se(qs)=inf;
end
end
end
se(find(se==inf))=[];
newlen=length(se);
%%%%%%%%%%%%%%%%%
for j=1:newlen-1 %計算斷點的值
setpoints(j)=(se(j)+se(j+1))/2;
end
%%%%%%%%%%%%%%%%%
nn=length(setpoints);%計算現有內部斷點數
for t1=1:nn+1 %計算各個斷點處的卡方統計值
%%%%%%%%%%計算卡方分布值
if t1==1
s1=length(find(-inf<seqs&seqs<setpoints(t1)));%取區間內的全部屬性值對應的地址
elseif t1==nn+1
s1=length(find(setpoints(t1-1)<=seqs&seqs<inf));
else
s1=length(find(setpoints(t1-1)<=seqs&seqs<setpoints(t1)));
end
num(t1)=s1;
P(t1)=s1/m;
end%接 for t1=1:nn+1
Hmin(1)=-sum(P.*log2(P));%初始熵值
k0=length(setpoints);
H0=Hmin(1);
Ck=zeros(1,2);%初始化Ck,另Ck(2)=0是為了它能夠進入循環while Ck(2)-Ck(1)>=0
%Ck(2)是合并區間后的值,在while循環中會設置
%%%%%%%%%%%%%%%%%%循環
symbol=0;
while Ck(2)-Ck(1)>=0
Ck(1)=Ck(2);
if symbol==1
setpt(ad)=[];
setpoints=[];
setpoints=setpt;
end
Q=[];
H=[];
num=[];
setpt=[];
nn=length(setpoints);%計算現有內部斷點數
setpt=setpoints;
for c1=1:nn%計算去掉每個斷點所得的熵值,找出其中熵差最小的一個作為合并對象
setpoints=[];
setpoints=setpt;
setpoints(c1)=[];
tt=length(setpoints);
for t2=1:tt+1 %計算各個斷點處的卡方統計值
%%%%%%%%%%計算卡方分布值
if t2==1
s1=length(find(-inf<seqs&seqs<setpoints(t2)));%取區間內的全部屬性值對應的地址
elseif t2==tt+1
s1=length(find(setpoints(t2-1)<=seqs&seqs<inf));
else
s1=length(find(setpoints(t2-1)<=seqs&seqs<setpoints(t2)));
end
num(t2)=s1;
Q(t1)=s1/m;
end%接 for t1=1:nn+1
H(c1)=-sum(Q.*log2(Q));%后續計算所得熵值
end
symbol=1;
[Hmin(2),ad]=min(H);
% Ck(2)=(nn)*Hmin(2)-Hmin(1)*(nn-1);
Ck(2)=(k0)*Hmin(2)-H0*(nn-1);
Hmin(1)=Hmin(2);
end
nt=length(setpt);
result(1:nt,i)=setpt;
end
pointsnum=[];
%將各個區間的值離散化
dds=zeros(m,n);
for i=1:n
locd=[];
locd=result(:,i);
locd(find(locd==0))=[];
pointsnum(i)=length(locd)+1;%區間個數
if length(locd)~=0
for j=1:pointsnum(i)
if j==1
dds(find(-inf<data(:,i)&data(:,i)<=locd(j)),i)=j;
elseif j==pointsnum(i)
dds(find(locd(j-1)<=data(:,i)&data(:,i)<inf),i)=j;
else
dds(find(locd(j-1)<=data(:,i)&data(:,i)<=locd(j)),i)=j;
end
end
end
end
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -