-
Notifications
You must be signed in to change notification settings - Fork 0
/
kmeans.m
149 lines (136 loc) · 3.49 KB
/
kmeans.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
%
% Copyright (c) 2018, Vishal_S
% All rights reserved. Please read the "license.txt" for license terms.
%
% Project Title: K-means clustering (As part of BT3041)
%
% Developer: Vishal S
%
% Contact Info: vishalsubbu97@gmail.com
%
clear all
clf
clc
%% Load data
clear all
clf
clc
hold on
filename = 'outlier.txt';
% read from the file
fileID = fopen(filename,'r');
formatSpec = '%f,%f,%f';
sizeA = [3 Inf];
A = fscanf(fileID,formatSpec,sizeA);
fclose(fileID);
%% Variable
no_of_clusters = 4;
convergence = 1.0;
iteration = 1;
% Define variables of appropriate length
max_len = size(A,2);
mean_old = zeros(2,no_of_clusters);
mean_new = zeros(2,no_of_clusters);
cluster_id = zeros(max_len,1);
sum = zeros(2,no_of_clusters);
no_of_points = zeros(1,no_of_clusters);
initial_mean = randi([1 max_len],[no_of_clusters 1]);
%Define stopping criteria
sse_old = 0.0;
convergence_limit = 0.001;
max_iteration = 1000;
%% Kmeans algorithm
% initialising the clusters' means
for i = 1:no_of_clusters
for j = 1:2
mean_old(j,i) = A(j,initial_mean(i));
end
end
while ((convergence > convergence_limit)&&(iteration < max_iteration))
X = ['Iteration = ',num2str(iteration)];
disp(X);
% Assigning each point to a cluster
dist = 0.0;
for i = 1:max_len
dist_x = (A(1,i)-mean_old(1,1))^2;
dist_y = (A(2,i)-mean_old(2,1))^2;
dist = dist_x + dist_y;
index = 1;
for j = 2:no_of_clusters
dist_x = (A(1,i)-mean_old(1,j))^2;
dist_y = (A(2,i)-mean_old(2,j))^2;
if(dist > dist_x + dist_y)
dist = dist_x + dist_y;
index = j;
end
end
cluster_id(i,1) = index;
end
for i = 1:no_of_clusters
sum(1,i) = 0.0;
sum(2,i) = 0.0;
no_of_points(1,i) = 0.0;
end
% Calculating new mean
for i = 1:max_len
sum(1,i) = 0.0;
sum(2,i) = 0.0;
sum(1,cluster_id(i,1)) = sum(1,cluster_id(i,1)) + A(1,i);
sum(2,cluster_id(i,1)) = sum(2,cluster_id(i,1)) + A(2,i);
no_of_points(1,cluster_id(i,1)) =no_of_points(1,cluster_id(i,1)) + 1;
end
% Calculating the new means
for i = 1:no_of_clusters
for j = 1:2
mean_new(j,i) = sum(j,i)/no_of_points(1,i);
end
end
% Caclulating SSE
sse_new = 0.0;
for i = 1:max_len
for j = 1:2
sse_new = sse_new + ( A(j,i)-mean_new(j,cluster_id(i)))^2;
end
end
convergence = abs((sse_new - sse_old)) / sse_new ;
% update the variables
iteration = iteration + 1;
sse_old = sse_new;
mean_old = mean_new;
end
%% Plot results
k=max(cluster_id);
Colors=hsv(k);
Legends = {};
for i=0:k
A_i=A(:,cluster_id==i);
if i~=0
Style = 'x';
MarkerSize = 8;
Color = Colors(i,:);
Legends{end+1} = ['Cluster #' num2str(i)];
end
if ~isempty(A_i)
%scatter3(A_i(1,:),A_i(2,:),A_i(3,:),Style,'MarkerSize',MarkerSize,'MarkerFaceColor',Color);
scatter(A_i(1,:),A_i(2,:),Style,'MarkerFaceColor',Color);
end
hold on;
end
Style = 'o';
MarkerSize = 10;
Color = [0 0 0];
Legends{end+1} = 'Centroids';
scatter(mean_new(1,:),mean_new(2,:),Style,'MarkerFaceColor',Color);
hold off;
grid on;
legend(Legends);
legend('Location', 'NorthEastOutside');
%% Print Results
X = ['SSE =',num2str(sse_new)];
disp(X);
Y = 'Centroids';
disp(Y);
for i = 1:no_of_clusters
Y = [' Centroid #',num2str(i),'=[',num2str(mean_new(1,i)),' ',num2str(mean_new(2,i)),']'];
disp(Y)
end