?? pca_hao.c
字號:
/*********************************/
/* Principal Components Analysis */
/*********************************/
/*********************************************************************/
/* Principal Components Analysis or the Karhunen-Loeve expansion is a
classical method for dimensionality reduction or exploratory data
analysis. One reference among many is: F. Murtagh and A. Heck,
Multivariate Data Analysis, Kluwer Academic, Dordrecht, 1987
(hardbound, paperback and accompanying diskette).
This program is public-domain. If of importance, please reference
the author. Please also send comments of any kind to the author:
F. Murtagh
Schlossgartenweg 1 or 35 St. Helen's Road
D-8045 Ismaning Booterstown, Co. Dublin
W. Germany Ireland
Phone: + 49 89 32006298 (work)
+ 49 89 965307 (home)
Telex: 528 282 22 eo d
Fax: + 49 89 3202362
Earn/Bitnet: fionn@dgaeso51, fim@dgaipp1s, murtagh@stsci
Span: esomc1::fionn
Internet: murtagh@scivax.stsci.edu
A Fortran version of this program is also available.
F. Murtagh, Munich, 6 June 1989 */
/*********************************************************************/
#include <stdio.h>
#include <string.h>
#include <math.h>
#define SIGN(a, b) ( (b) < 0 ? -fabs(a) : fabs(a) )
main(argc, argv)
int argc;
char *argv[];
{
FILE *stream;
int n, m, i, j, k, k2;
float **data, **matrix(), **symmat, **symmat2, *vector(), *evals, *interm;
void free_matrix(), free_vector(), corcol(), covcol(), scpcol();
void tred2(), tqli();
float in_value;
char option, *strncpy();
/*********************************************************************
Get from command line:
input data file name, #rows, #cols, option.
Open input file: fopen opens the file whose name is stored in the
pointer argv[argc-1]; if unsuccessful, error message is printed to
stderr.
*********************************************************************/
if (argc != 5)
{
printf("Syntax help: PCA filename #rows #cols option\n\n");
printf("(filename -- give full path name,\n");
printf(" #rows \n");
printf(" #cols -- integer values,\n");
printf(" option -- R (recommended) for correlation analysis,\n");
printf(" V for variance/covariance analysis\n");
printf(" S for SSCP analysis.)\n");
exit(1);
}
n = atoi(argv[2]); /* # rows */
m = atoi(argv[3]); /* # columns */
strncpy(&option,argv[4],1); /* Analysis option */
printf("No. of rows: %d, no. of columns: %d.\n",n,m);
printf("Input file: %s.\n",argv[1]);
if ((stream = fopen(argv[1],"r")) == NULL)
{
fprintf(stderr, "Program %s : cannot open file %s\n",
argv[0], argv[1]);
fprintf(stderr, "Exiting to system.");
exit(1);
/* Note: in versions of DOS prior to 3.0, argv[0] contains the
string "C". */
}
/* Now read in data. */
data = matrix(n, m); /* Storage allocation for input data */
for (i = 1; i <= n; i++)
{
for (j = 1; j <= m; j++)
{
fscanf(stream, "%f", &in_value);
data[i][j] = in_value;
}
}
/* Check on (part of) input data.
for (i = 1; i <= 18; i++) {for (j = 1; j <= 8; j++) {
printf("%7.1f", data[i][j]); } printf("\n"); }
*/
symmat = matrix(m, m); /* Allocation of correlation (etc.) matrix */
/* Look at analysis option; branch in accordance with this. */
switch(option)
{
case 'R':
case 'r':
printf("Analysis of correlations chosen.\n");
corcol(data, n, m, symmat);
/* Output correlation matrix.
for (i = 1; i <= m; i++) {
for (j = 1; j <= 8; j++) {
printf("%7.4f", symmat[i][j]); }
printf("\n"); }
*/
break;
case 'V':
case 'v':
printf("Analysis of variances-covariances chosen.\n");
covcol(data, n, m, symmat);
/* Output variance-covariance matrix.
for (i = 1; i <= m; i++) {
for (j = 1; j <= 8; j++) {
printf("%7.1f", symmat[i][j]); }
printf("\n"); }
*/
break;
case 'S':
case 's':
printf("Analysis of sums-of-squares-cross-products");
printf(" matrix chosen.\n");
scpcol(data, n, m, symmat);
/* Output SSCP matrix.
for (i = 1; i <= m; i++) {
for (j = 1; j <= 8; j++) {
printf("%7.1f", symmat[i][j]); }
printf("\n"); }
*/
break;
default:
printf("Option: %s\n",option);
printf("For option, please type R, V, or S\n");
printf("(upper or lower case).\n");
printf("Exiting to system.\n");
exit(1);
break;
}
/*********************************************************************
Eigen-reduction
**********************************************************************/
/* Allocate storage for dummy and new vectors. */
evals = vector(m); /* Storage alloc. for vector of eigenvalues */
interm = vector(m); /* Storage alloc. for 'intermediate' vector */
symmat2 = matrix(m, m); /* Duplicate of correlation (etc.) matrix */
for (i = 1; i <= m; i++)
{
for (j = 1; j <= m; j++)
{
symmat2[i][j] = symmat[i][j]; /* Needed below for col. projections */
}
}
tred2(symmat, m, evals, interm); /* Triangular decomposition */
tqli(evals, interm, m, symmat); /* Reduction of sym. trid. matrix */
/* evals now contains the eigenvalues,
columns of symmat now contain the associated eigenvectors. */
printf("\nEigenvalues:\n");
for (j = m; j >= 1; j--)
{
printf("%18.5f\n", evals[j]); //特征值
}
printf("\n(Eigenvalues should be strictly positive; limited\n");
printf("precision machine arithmetic may affect this.\n");
printf("Eigenvalues are often expressed as cumulative\n");
printf("percentages, representing the 'percentage variance\n");
printf("explained' by the associated axis or principal component.)\n");
printf("\nEigenvectors:\n");
printf("(First three; their definition in terms of original vbes.)\n");
for (j = 1; j <= m; j++)
{
for (i = 1; i <= 3; i++)
{
printf("%12.4f", symmat[j][m-i+1]);
}
printf("\n");
}
/* Form projections of row-points on first three prin. components. */
/* Store in 'data', overwriting original data. */
for (i = 1; i <= n; i++)
{
for (j = 1; j <= m; j++)
{
interm[j] = data[i][j];
} /* data[i][j] will be overwritten */
for (k = 1; k <= 3; k++)
{
data[i][k] = 0.0;
for (k2 = 1; k2 <= m; k2++)
{
data[i][k] += interm[k2] * symmat[k2][m-k+1];
}
}
}
printf("\nProjections of row-points on first 3 prin. comps.:\n");
for (i = 1; i <= n; i++)
{
for (j = 1; j <= 3; j++)
{
printf("%12.4f", data[i][j]);
}
printf("\n");
}
/* Form projections of col.-points on first three prin. components. */
/* Store in 'symmat2', overwriting what was stored in this. */
for (j = 1; j <= m; j++)
{
for (k = 1; k <= m; k++)
{
interm[k] = symmat2[j][k];
} /*symmat2[j][k] will be overwritten*/
for (i = 1; i <= 3; i++)
{
symmat2[j][i] = 0.0;
for (k2 = 1; k2 <= m; k2++)
{
symmat2[j][i] += interm[k2] * symmat[k2][m-i+1];
}
if (evals[m-i+1] > 0.0005) /* Guard against zero eigenvalue */
symmat2[j][i] /= sqrt(evals[m-i+1]); /* Rescale */
else
symmat2[j][i] = 0.0; /* Standard kludge */
}
}
printf("\nProjections of column-points on first 3 prin. comps.:\n");
for (j = 1; j <= m; j++)
{
for (k = 1; k <= 3; k++)
{
printf("%12.4f", symmat2[j][k]);
}
printf("\n");
}
free_matrix(data, n, m);
free_matrix(symmat, m, m);
free_matrix(symmat2, m, m);
free_vector(evals, m);
free_vector(interm, m);
}
/** Correlation matrix: creation ***********************************/
void corcol(data, n, m, symmat)
float **data, **symmat;
int n, m;
/* Create m * m correlation matrix from given n * m data matrix. */
{
float eps = 0.005;
float x, *mean, *stddev, *vector();
int i, j, j1, j2;
/* Allocate storage for mean and std. dev. vectors */
mean = vector(m);
stddev = vector(m);
/* Determine mean of column vectors of input data matrix */
for (j = 1; j <= m; j++)
{
mean[j] = 0.0;
for (i = 1; i <= n; i++)
{
mean[j] += data[i][j];
}
mean[j] /= (float)n;
}
printf("\nMeans of column vectors:\n");
for (j = 1; j <= m; j++) {
printf("%7.2f",mean[j]); } printf("\n");
/* Determine standard deviations of column vectors of data matrix. */
for (j = 1; j <= m; j++)
{
stddev[j] = 0.0;
for (i = 1; i <= n; i++)
{
stddev[j] += ( ( data[i][j] - mean[j] ) *
( data[i][j] - mean[j] ) );
}
stddev[j] /= (float)n;
stddev[j] = sqrt(stddev[j]);
/* The following in an inelegant but usual way to handle
near-zero std. dev. values, which below would cause a zero-
divide. */
if (stddev[j] <= eps) stddev[j] = 1.0;
}
printf("\nStandard deviations of columns:\n");
for (j = 1; j <= m; j++) { printf("%7.1f", stddev[j]); }
printf("\n");
/* Center and reduce the column vectors. */
for (i = 1; i <= n; i++)
{
for (j = 1; j <= m; j++)
{
data[i][j] -= mean[j];
x = sqrt((float)n);
x *= stddev[j];
data[i][j] /= x;
}
}
/* Calculate the m * m correlation matrix. */
for (j1 = 1; j1 <= m-1; j1++)
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -