?? emclust.m
字號:
%EMCLUST Expectation-Maximization clustering%% [LABELS,W_EM] = EMCLUST (A,W_CLUST,K,LABTYPE,FID)%% INPUT% A Dataset, possibly labeled% W_CLUST Cluster model mapping, untrained (default: nmc)% K Number of clusters (default: 2)% LABTYPE Label type: 'crisp' or 'soft' (default: label type of A)% FID File ID to write progress to (default [], see PRPROGRESS)%% OUTPUT% LABELS Integer labels for the objects in A pointing to their cluster% W_EM EM clustering mapping%% DESCRIPTION% The untrained classifier mapping W_CLUST is used to update an initially% labeled dataset A by iterating the following two steps:% 1. Train W : W_EM = A*W_CLUST% 2. Relabel A : A = dataset(A,labeld(A*W_EM*classc))% This is repeated until the labeling does not change anymore. The final% classification matrix is returned in B. The final crisp labeling is returned% in LABELS. W_EM may be used for assigning new objects.%% If K is given, a random initialisation for K clusters is made and labels% of A are neglected. %% LABTYPE determines the type of labeling: 'crisp' or 'soft'. Default: label% type of A. It is assumed W_CLUST can handle the LABTYPE requested.% Only in case LABTYPE is 'soft' the tradition EM algorithm is followed.% In case LABTYPE is 'crisp' EMCLUST follows a generalised k-means% algorithm.%% SEE ALSO% MAPPINGS, DATASETS, KMEANS, PRPROGRESS% Copyright: R.P.W. Duin, r.p.w.duin@prtools.org% Faculty EWI, Delft University of Technology% P.O. Box 5031, 2600 GA Delft, The Netherlands% $Id: emclust.m,v 1.23 2005/10/29 18:59:41 duin Exp $function [new_lab,w_em] = emclust (a,w_clust,n,type,fid) prtrace(mfilename); n_ini = 500; % Maximum size of subset to use for initialisation. epsilon = 1e-6; % Stop when average labeling change drops below this. % Check arguments. if (nargin < 5), fid = []; end if (nargin < 4) prwarning(3,'No label type specified, using label type of dataset A.'); type = []; end if (nargin < 3) | isempty(n) prwarning(3,'No number of clusters specified, using number of classes in A.'); n = []; end if (nargin < 2) | isempty(w_clust) prwarning(2,'No clustering mapping specified, assuming NMC.'); w_clust = nmc; end isuntrained(w_clust); % Assert that clustering mapping is untrained. % Determine number of clusters N and initialisation method. a = dataset(a); islabtype(a,'crisp','soft'); [m,k,c] = getsize(a); rand_init = 1; if (isempty(n)) if (c == 1) % For one class, find two clusters. n = 2; else n = c; rand_init = 0; % Use given classification as initialisation. end end if (n < 1), error('Number of clusters should be at least one.'); end if (n == 1), prwarning(4,'Clustering with 1 cluster is trivial.'); end % Set label type, if given. if ~isempty(type), a = setlabtype(a,type); end a = setprior(a,[]); % make sure that priors will be deleted % Initialise by performing KCENTRES on... if (rand_init) if (m > n_ini) % ... a random subset of A. prwarning(2,'Initializing by performing KCENTRES on a subset of %d samples.', n_ini); a_ini = +gendat(+a,n_ini); else prwarning(2,'Initializing by performing KCENTRES on the training set.'); a_ini = +a; % ... the entire set A. end not_found = 1; itern = 0; while(not_found) % try to find an initialisation with all class sizes > 1 itern = itern + 1; if itern > 100 error('Not possible to find desired number of components') end assign = kcentres(+distm(a_ini),n); % Train initial classifier on labels generated by KCENTRES and find % initial hard labels. Use NMC instead of W_CLUST to make sure that we % always have enough data to estimate the parameters. a_ini = dataset(a_ini,assign); a_ini = setprior(a_ini,getprior(a_ini,0)); d = a*(a_ini*nmc); if (islabtype(a,'soft')) new_lab = +d; not_found = 0; else new_lab = d*labeld; if all(classsizes(dataset(d,new_lab)) > 1) not_found = 0; end end end else a = setlablist(a,[1:c]'); new_lab = getlabels(a); % Use given labeling. end % Ready for the work. prprogress(fid,'\nemclust optimization\n') iter = 0; if (islabtype(a,'soft')) change = 1; a = setlabels(a,new_lab); a = setprior(a,getprior(a,0)); lab = new_lab; while (change > epsilon) % EM loop, run until labeling is stable. w_em = a*w_clust; % 1. Train classifier, density output. b = a*(w_em*classc); % 2. Assign probability to training samples. a = settargets(a,b); % 3. Insert probabilities as new labels. change = mean(mean((+b-lab).^2)); lab = b; prprogress(fid,' change = %f (mse)\n',change); iter = iter+1; if iter > 500 prwarning(1,'emclust stopped after 500 iterations') change = 0; end end else % crsip labels lab = ones(m,1); it = 0; while (any(lab ~= new_lab) & it < 50) % EM loop, run until labeling is stable. it = it + 1; a = setlabels(a,new_lab); % 0. Set labels and store old labels. a = setprior(a,getprior(a,0));% Set priors to class frequencies lab = new_lab; % a = remclass(a,1); % demand class sizes > 2 objects itern = 0; while getsize(a,3) < n % increase number of classes if necessary itern = itern + 1; if itern > 100 error('Not possible to find desired number of components') end laba = getlablist(a); labmax = max(laba); N = classsizes(a); [Nmax,cmax] = max(N); % find largest class aa = seldat(a,cmax); % select just that one new_lab_aa = kmeans(aa,2); % split it by kmeans N1 = sum(new_lab_aa == 1); N2 = sum(new_lab_aa == 2); if (N1 > 1 & N2 > 1) % use it if both classes have more than one sample J = findlabels(a,laba(cmax,:)); a = setlabels(a,new_lab_aa + labmax,J); end end w_em = a*w_clust; % 1. Compute classifier, crisp output. b = a*w_em; % 2. Classify training samples. new_lab = labeld(b); % 3. Insert classification as new labels. prprogress(fid,' change = %d objects, no. of classes = %i\n', ... length(find(lab ~= new_lab)),length(unique(new_lab))); iter = iter+1; %DXD Added also the iter for the crisp labels if iter > 500 prwarning(1,'emclust stopped after 500 iterations') change = 0; end end end prprogress(fid,'emclust finished\n')return;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -