?? kmean.c
字號:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define K 2
#define D 12
#define N 113
#define LABEL_SIZE 30
double records[N][D];
char labels[N][LABEL_SIZE];
const double init_centers[K][D] = {
{ 10.4923636 ,
53.4163636 ,
20.5090909,
87.7545455 ,
306.8181818 ,
1.7636364 ,
240.4727273 ,
215.6181818 ,
47.1672727 ,
0.2545455 ,
0.2727273 ,
0.1454545 } ,
{8.847931 ,
53.0568966 ,
11.3206897 ,
75.8189655 ,
200.3448276 ,
1.9310345 ,
144.8103448 ,
133.0689655 ,
39.3586207 ,
0.3103448 ,
0.3793103 ,
0.137931}
};
double centers[K][D];
int member[N];
char all_labels[K][LABEL_SIZE];
void input_data()
{
int i, j, found;
FILE * file;
char ch;
int label_len = 0;
file = fopen("iris.txt", "r");
for (i = 0; i < N; ++i) {
for (j = 0; j < D; ++j)
fscanf(file, "%lf,", &(records[i][j]));
fscanf(file, "%s", labels[i]);
// printf("%s\n",labels[i]);
found = 0;
for (j = 0; j < label_len; ++j)
if (strcmp(labels[i], all_labels[j]) == 0)
found = 1;
if (!found) {
strcpy(all_labels[label_len], labels[i]);
// printf("%s\n",all_labels[label_len]);
++label_len;
}
}
close(file);
}
double distance(double r1[D], double r2[D])
{
double result = 0;
int i;
for (i = 0; i < D; ++i)
result += (r1[i] - r2[i]) * (r1[i] - r2[i]);
return result;
}
void center_to_member()
{
int i, j;
for (i = 0; i < N; ++i)
{
double min_dist = 1e50;
int min_j = -1;
for (j = 0; j < K; ++j)
{
double curr_dist = distance(records[i], init_centers[j]);
if (curr_dist < min_dist)
{
min_dist = curr_dist;
min_j = j;
}
}
member[i] = min_j;
}
}
void member_to_center()
{
int i, j, p, num;
for (i = 0; i < K; ++i)
for (j = 0; j < D; ++j)
{
centers[i][j] = 0;
num = 0;
for (p = 0; p < N; ++p)
if (member[p] == i)
{
centers[i][j] += records[p][j];
++num;
}
centers[i][j] = centers[i][j] / num;
}
}
double distance_sum()
{
int i, j;
double result = 0;
for (i = 0; i < N; ++i)
result += distance(records[i], centers[member[i]]);
return result;
}
void clustering()
{
int i, j;
double last_dist_sum, curr_dist_sum;
for (i = 0; i < K; ++i)
for (j = 0; j < D; ++j)
centers[i][j] = init_centers[i][j];
curr_dist_sum = 1e50;
do {
last_dist_sum = curr_dist_sum;
center_to_member();
member_to_center();
curr_dist_sum = distance_sum();
printf("curr dist sum = %lf\n", curr_dist_sum);
} while (curr_dist_sum < last_dist_sum);
}
void print_result()
{
int i, j, num, p;
int label_count[K];
FILE * file = fopen("hw4-2-output.txt", "w");
for (i = 0; i < K; ++i)
{
for (j = 0; j < K; ++j)
label_count[j] = 0;
/* number of points */
num = 0;
for (j = 0; j < N; ++j)
if (member[j] == i)
{
++num;
for (p = 0; p < K; ++p)
if (strcmp(labels[j], all_labels[p])==0)
++label_count[p];
}
fprintf(file, "There are %d points in cluster %d\n", num, i);
/* counts of labels */
for (j = 0; j < K; ++j)
{
fprintf(file, "%d [%s] ", label_count[j], all_labels[j]);
}
fprintf(file, "\n");
/* cluster center */
fprintf(file, "Cluster center =");
for (j = 0; j < D; ++j)
fprintf(file, " %lf", centers[i][j]);
fprintf(file, "\n\n");
}
for (i = 0; i < N; ++i)
{
fprintf(file, "%s %d\n", labels[i], member[i]);
}
fclose(file);
}
int main()
{
input_data();
clustering();
print_result();
return 0;
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -