?? pca.m
字號:
function [evals,evecs,efuns,edata]=pca(D,meantrend,varnorm,k_rank,method,reportflag)
%PCA - Principal Component Analysis of data matrix
% PCA performs principal component (EOF) analysis on input
% time series, using either the SVD method or the COVARIANCE
% MATRIX method, depending on the size of the problem. See Emery
% and Thompson or Preisendorfer for details.
%
% Input: D - a matrix of observation time series, one series
% per column. In the case that the columns represent
% different physical quantities, the varnorm flag below
% can be used to normlize the column variance, by
% dividing each series by it's demeaned std.
% meantrend - binary flag (0|1) specifying whether (or not)
% to remove mean and linear trends from the column
% data; the default is to remove both mean and
% linear trend.
% varnorm - binary flag (0|1) specifying whether (or not)
% the code should perform variance normaliation
% of the observation matrix D columns. The default
% is NOT to normalize the variance.
% k_rank - number of eigenvalues/vectors/functions to return.
% If k_rank>rank(D), then this option does nothing.
% If k_rank==0, then all eig's are returned; this is
% the default. Inputting k_rank < rank(D) amounts
% to returning the rank-k approximation to the data
% matrix. k_rank can also be the string 'SIG', which
% tells the routine to determine the number of
% statistically signifigant modes and return that
% k_rank approximation. Specifying k_rank to anything
% other than 0 forces the method to be 'SVD'.
% method - override the method selection; either 'COV','SVD'
% or 'SEL' to let the code select the method
% report - binary flag (0|1) to write a "report" to stdout
% of the "success" of the analysis. Default is no report.
%
% Output:
% evals - vector of eigenvalues of the covariance matrix (D'*D)
% evecs - matrix of eigenvectors of the covariance matrix
% efuns - matrix of eigenfunctions (amplitude functions, e.g.)
% edata - k_rank approximation to the data matrix; this matrix
% will only be returned if 0 < k_rank < rank(D).
%
% NOTES: 1) Use COMP_VAR_EL_PARAMS to rotate vector series onto principal
% axes if data resprsents vector series.
%
% Call as: [evals,evecs,efuns]=pca(D);
% OR: [evals,evecs,efuns]=pca(D,meantrend);
% OR: [evals,evecs,efuns]=pca(D,meantrend,varnorm);
% OR: [evals,evecs,efuns,edata]=pca(D,meantrend,varnorm,k_rank);
% OR: [evals,evecs,efuns,edata]=pca(D,meantrend,varnorm,k_rank,method);
% OR: [evals,evecs,efuns,edata]=pca(D,meantrend,varnorm,k_rank,method,report);
%
if nargin==0
disp(' Call as: [evals,evecs,efuns]=pca(D);');
disp(' [evals,evecs,efuns]=pca(D,meantrend);');
disp(' [evals,evecs,efuns]=pca(D,meantrend,varnorm);');
disp(' [evals,evecs,efuns,edata]=pca(D,meantrend,varnorm,k_rank);');
disp(' [evals,evecs,efuns,edata]=pca(D,meantrend,varnorm,k_rank,method);');
disp(' [evals,evecs,efuns,edata]=pca(D,meantrend,varnorm,k_rank,method,report);');
evals=[];evecs=[];efuns=[];
return
end
%% PROCESS INPUT ARGUMENTS
if isstr(D)&strcmp(lower(D),'refs')
disp('Preisendorfer, PCA in Meteorology and Oceanography, Elsevier, 1988.');
disp('Overland and Preisendorfer, Signifigance test for PCA, Monthly Weather Review, 1982.');
disp('Kelly, JPO, 1988');
disp('Emery and Thompson, Data Analysis Methods in Physical Oceanography, 1998.')
evals=[];evecs=[];efuns=[];edata=[];
return
end
if exist('meantrend')
if meantrend~=0 & meantrend~=1
error('MEANTREND flag to PCA must be 0|1.')
end
else
meantrend=1;
end
if exist('varnorm')
if varnorm~=0 & varnorm~=1
error('VARNORM flag to PCA must be 0|1.')
end
else
varnorm=0;
end
sig_test='no';
if exist('k_rank')
if isstr(k_rank)
if ~strcmp(lower(k_rank),'sig')
error('if K_RANK to PCA is a string, it must be the string ''SIG''.')
end
sig_test='yes';
end
if k_rank<0
error('K_RANK to PCA >=0 or the string ''SIG''.')
end
else
k_rank=0;
end
if exist('method')
if ~isstr(method)|((~strcmp(method,'COV'))&...
(~strcmp(method,'SVD'))&...
(~strcmp(method,'SEL')))
error('METHOD to PCA must be ''COV'' or ''SVD'' or ''SEL''')
end
else
method='SEL';
end
if isstr(k_rank),method='SVD';,end
% Check the number of outgoing arguments.
if nargout<4
error('Rank-k data matrix requested, but <4 output arguments provided.')
end
if exist('reportflag')
if reportflag~=0 & reportflag~=1
error('REPORT flag to PCA must be 0|1.')
end
else
reportflag=0;
end
% Get number of series (spatial points, variables, etc.) (M,
% column count) and length of observations (N, row count).
% Recall that the observation matrix is input with time
% proceeding down the rows, and location (variables) across the
% columns
[M,N]=size(D'); % NOTE THE TRANSPOSE !!!
% The relative sizes of M and N determine whether we compute
% the EOFs from the covariance matrix or whether we use the
% SVD method, unless overridden by user on input.
if strcmp(method,'SEL')
if M>N
method='SVD';
else
method='COV';
end
end
% Regardless of the method, we form a "data" matrix DT, which is
% de-meaned and linearly de-trended, if the user has specified
% meantrend==1.
DM=D;
if meantrend==1
% Demean data matrix D
if reportflag
disp(' ')
disp(['Before: Mean = ',num2str(mean(D))])
disp(['De-meaning, De-trending data'])
end
[DM,MEAN]=demean(DM);
% Some de-trending needs to happen here; atleast linear removal
% Compute linear trend for each variable
[DM,TRENDCOEFFS]=detrend2(DM);
end
% So far, observations have been de-meaned and linearly
% de-trended. If the observation series
% represent different physical quantities (temp and sal, for
% example), then the user should have specified varmorm==1, so ...
if varnorm
STD=std(DM);
if reportflag
disp(' ')
disp(['Normalizing variance.'])
disp(['Before: Min STD = ',num2str(min(STD))])
disp([' Max STD = ',num2str(max(STD))])
end
[DM,STDD]=normvar(DM);
if reportflag
STD=std(DM);
disp(['After : Min STD = ',num2str(min(STD))])
disp([' Max STD = ',num2str(max(STD))])
end
end
% At this point, we transpose the data matrix to put the
% time along the columns;
DM=DM';
switch method
case 'COV'
% Form DM times DM-transpose, DDT
DDT=(1/(N-1))*DM*DM';% This is the covariance matrix
flops(0);tic;
[U,d]=eig(DDT);
FLOPS=flops;
TOC=toc;
[evals,iperm]=sort(diag(d));
iperm=flipud(iperm); % Flip to get descending order
evals=flipud(evals);
evecs=U(:,iperm); % Permute the columns of U
efuns=DM'*evecs;
edata=[]; % Rank-k data cannot be returned if method=='COV'
case 'SVD'
flops(0);tic;
if reportflag,disp('Performing SVD...'),end
[U,S,V]=svd(DM);
FLOPS=flops;
TOC=toc;
% now, we compute the eigenvalues from the singular values;
% they are the squares of the SVs, scaled by the length of
% the time series -1, as in the covariance, matrix.
evals=(1/(N-1))*diag(S).^2;
% evecs are in U
evecs=U;
% Eigenfunctions
efuns=V*S';
DMrank=length(diag(S)>eps);
% Rank-k Approximation to the input data matrix
if isstr(k_rank) % then it must be 'SIG', and determine the
% statistically signifigant singular values.
if reportflag,disp('Determining signifigance...'),end
% set k_rank according to the following signifigance test.
% The sig. test is based on a Monte Carlo simulation.
% See Overland and Preisendorfer, Monthly Weather Review,
% Jan. '82, Vol. 110, No. 1 for details, and notation.
% Form the normalized eigenvalue statistic:
T=evals/sum(evals); % eqn 1
% Form 100 replicates of a length DMrank by
% M gaussian random noise matrix
for r=1:100
F=randn(DMrank,M);
delta(r,:)=(eig(F*F'))';
end
for r=1:100
UU(r,:)=delta(r,:)/sum(delta(r,:));
end
for j=1:M
UU(:,j)=sort(UU(:,j));
end
ik=find(T>UU(95,:)');
k_rank=ik(length(ik));
end
if k_rank>0 & (k_rank<=min(M,N))
%return the k_rank approximation to the input data matrix.
edata=(U(:,1:k_rank)*S(1:k_rank,1:k_rank)*V(:,1:k_rank)')';
if varnorm % add back in the std
edata=edata.*(ones(size(edata(:,1)))*STDD);
end
if meantrend % add back in the means and trends
tt=(1:length(edata(:,1)))';tt=tt-mean(tt);
edata=edata+tt*TRENDCOEFFS;
edata=edata+ones(size(edata(:,1)))*MEAN;
end
else % k_rank=0
edata=[];
end
end
if nargout==1 | nargout==0
efuns=[];evecs=[];
elseif nargout==2
efuns=[];
elseif nargout==3
edata=[];
end
if reportflag
disp(' ')
disp(['Method = ' upper(method)]);
disp(['Flops = ' int2str(FLOPS)]);
disp(['O(MNN) = ' int2str(M*N*N)]);
disp(['Time = ' num2str(TOC)]);
disp(['Matrix Rank = ' num2str(DMrank)]);
disp(' ')
if strcmp(sig_test,'yes')
disp(['Signifigant Rank (95%-conf level) = ' int2str(k_rank)])
end
if k_rank>0
disp(['Rank-' int2str(k_rank) ' data matrix approximation returned in 4th output argument.'])
disp(' ')
end
maxerr=max(max(abs(evecs*evecs')-eye(size(evecs))));
disp(['Max error in orthogonality of eigenvectors: ' num2str(maxerr)])
disp(' ')
disp(' Variance Table')
if strcmp(sig_test,'yes')
disp(' (** indicates signifigance)')
end
disp(' Mode E-val %Var ')
nr=min(M,N);
pvar=cumsum(evals)/sum(evals)*100;
for i=1:nr
if i<=k_rank & strcmp(sig_test,'yes')
fprintf(' ** %d %.3f %6.2f\n',[ i evals(i) pvar(i)])
else
fprintf(' %d %.3f %6.2f\n',[ i evals(i) pvar(i)])
end
end
fprintf('Total %.3f\n',sum(evals(1:nr)))
end
%
% Brian O. Blanton
% Department of Marine Sciences
% Ocean Processes Numerical Modeling Laboratory
% 12-7 Venable Hall
% CB# 3300
% University of North Carolina
% Chapel Hill, NC
% 27599-3300
%
% 919-962-4466
% blanton@marine.unc.edu
%
% Summer 1999
%
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -