/
DrawKmeansProcess.py
161 lines (127 loc) · 4.71 KB
/
DrawKmeansProcess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import seaborn as sns
import scipy as sp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# parameter 'data' : numpy array OR pd.DataFrame OR python list
def kmeans_process_2d(data, n_clusters, palette = None):
if type(data) == type(pd.DataFrame([])):
data.columns = ['x', 'y']
data.reset_index(drop=True)
else:
data = pd.DataFrame(data).reset_index(drop=True)
data.columns = ['x', 'y']
# random centroid
centroids = data.sample(n_clusters).sort_values('x').reset_index(drop=True)
# random centroid - scatter
print("\nrandom centroid")
plt.subplots()
sns.scatterplot(x='x', y='y', data=data)
plt.scatter(centroids['x'], centroids['y'], marker='D', c='black')
plt.title('k-means algorithm', fontsize=15)
plt.show()
def drawFigure():
fig = plt.figure()
sns.scatterplot(x="x", y="y", hue="cluster", data=result, palette = palette,
legend = False)
plt.scatter(centroids['x'], centroids['y'], marker='D', c='black')
plt.title('k-means algorithm', fontsize=15)
plt.show()
while(True):
# reassign data
distance = sp.spatial.distance.cdist(data, centroids, "euclidean")
cluster_num = np.argmin(distance, axis=1)
result = data.copy()
result["cluster"] = np.array(cluster_num)
# reassign data - scatter
print("\nreassign data")
drawFigure()
# reassign centroid
centroids_ = result.groupby("cluster").mean()
centroids_ = pd.DataFrame(centroids_, columns=['x', 'y']).sort_values('x').reset_index(drop=True)
if (centroids_['x'].tolist() == centroids['x'].tolist() and
centroids_['y'].tolist() == centroids['y'].tolist()): break
centroids = centroids_
# reassign centroid - scatter
print("\nreassign centroid")
drawFigure()
print("\ndone")
def gif_kmeans(data, n_clusters, palette, frame=1000):
import os
from PIL import Image
from IPython.display import Image as Img
from IPython.display import display
if type(data) == type(pd.DataFrame([])):
data.columns = ['x', 'y']
data.reset_index(drop=True)
else:
data = pd.DataFrame(data).reset_index(drop=True)
data.columns = ['x', 'y']
count = 0
# makedirs
try:
if not os.path.exists('kmeans'):
os.makedirs('kmeans')
else:
os.rmdir('./kmeans')
print("existing directory 'kmeans' removed now before making dir 'kmeans'")
os.makedirs('kmeans')
except OSError:
return print("Error: can't makedirs ./kmeans \n",
"if your existing directory 'kmeans' has any files, this ERROR can be pop up")
# change working dir: ./kmeans
os.chdir('./kmeans')
# random centroid
centroids = data.sample(n_clusters).sort_values('x').reset_index(drop=True)
# random centroid - scatter
fig = plt.figure()
sns.scatterplot(x='x', y='y', data=data)
plt.scatter(centroids['x'], centroids['y'], marker='D', c='black')
plt.title('k-means algorithm', fontsize=15)
plt.savefig("{0:05d}.png".format(count))
plt.close(fig)
count += 1
def drawFigure():
fig = plt.figure()
sns.scatterplot(x="x", y="y", hue="cluster", data=result, palette = palette,
legend = False)
plt.scatter(centroids['x'], centroids['y'], marker='D', c='black')
plt.title('k-means algorithm', fontsize=15)
return fig
while(True):
# reassign data
distance = sp.spatial.distance.cdist(data, centroids, "euclidean")
cluster_num = np.argmin(distance, axis=1)
result = data.copy()
result["cluster"] = np.array(cluster_num)
# reassign data - scatter
fig = drawFigure()
plt.savefig("{0:05d}.png".format(count))
plt.close(fig)
count += 1
# reassign centroid
centroids_ = result.groupby("cluster").mean()
centroids_ = pd.DataFrame(centroids_, columns=['x', 'y']).sort_values('x').reset_index(drop=True)
if (centroids_['x'].tolist() == centroids['x'].tolist() and
centroids_['y'].tolist() == centroids['y'].tolist()): break
centroids = centroids_
# reassign centroid - scatter
fig = drawFigure()
plt.savefig("{0:05d}.png".format(count))
print((int) (count/2))
plt.close(fig)
count += 1
# start to make gif file
# os.listdir() : return all files or dirs in working directory
img_list = os.listdir()
img_list = sorted([file_name for file_name in img_list])
images = [Image.open(file_name_with_path) for file_name_with_path in img_list]
im = images[0]
# return root dir
os.chdir('../')
im.save('k-means.gif', save_all=True, append_images=images[1:], loop=0xff, duration=frame) # duration: 프레임 전환 속도
# delete tmp files
for i in range(count):
os.remove("./kmeans/{0:05d}.png".format(i))
os.rmdir('./kmeans')
return print('done : save success')