https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html  

### 머신러닝
* 지도학습(supervised learning) : 정답이 존재
    * 회귀(regression)
    * 분류(classification)
* 비지도학습(unsupervised learning) : 정답이 없음
    * 그룹화(군집화, clustering)
    * 차원축소(Dimensionality reduction)
    * 생성(Generation)
* 강화학습(Reinforcement learning)

In [11]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from pandas.plotting import scatter_matrix
from sklearn import datasets
from sklearn.cluster import KMeans

In [None]:
# 임의 데이터, 과제는 iris set
X, y =  datasets.make_blobs(n_samples=400,
                n_features=2,
                centers=3,
                cluster_std=2.0,
                shuffle=True, 
                random_state=195397
                )

X_train = X.copy()

print("X shape:", X.shape)
# plt.scatter(X_train[:,0], X_train[:,1], c=y_train, cmap=plt.cm.brg, s=50, edgecolor='k')
plt.scatter(X_train[:,0], X_train[:,1], s=50, edgecolor='k')
plt.show()

In [None]:
n_clusters = 2 # 차원도 2개, 클러스터도 2개
model = KMeans(n_clusters = n_clusters,
                       max_iter=300,
                       tol = 1e-4,
                       n_init = 'auto',
                       random_state=195397) # 본인학번이용

model.fit(X_train)

print("Cluster Center : ", model.cluster_centers_)
print("inertia : ", model.inertia_)
print("Iteration : ", model.n_iter_)


pred_train = model.predict(X_train)

plt.scatter(X_train[:,0], X_train[:,1],  c=pred_train, cmap=plt.cm.brg, s=50, edgecolor='k')

In [None]:
# k에 따라 inertias를 계산해보자
# 기울기가 거의변하지않기 시작하는 시작점(3)이 cluster의 최적값
# elbow method.
inertias = []
for n_clusters in range(1,11):
    model = KMeans(n_clusters = n_clusters,
                        max_iter=300,
                        tol = 1e-4,
                        n_init = 'auto',
                        random_state=195397)

    model.fit(X_train)
    inertias.append(model.inertia_)

plt.plot(range(1,11),inertias , 'o-')
plt.grid()

In [None]:
# iris data로 해보자
iris = datasets.load_iris()
X = iris.data
y = iris.target

X_train = X.copy()



from pandas.plotting import scatter_matrix
import pandas as pd

scatter_matrix(pd.DataFrame(X, columns=iris['feature_names']), 
   s=100, figsize=(10,10)) # c = pred_train 을 넣으면 클러스터별 색상으로 표시
plt.show()

In [None]:
# k에 따라(클러스터수에 따라) inertias를 계산해보자
# 기울기가 거의변하지않기 시작하는 시작점(3)이 cluster의 최적값
# elbow method.
inertias = []
for n_clusters in range(1,11):
    model = KMeans(n_clusters = n_clusters,
                        max_iter=300,
                        tol = 1e-4,
                        n_init = 'auto',
                        random_state=195397)

    model.fit(X_train)
    inertias.append(model.inertia_)

plt.plot(range(1,11),inertias , 'o-')
plt.grid()

In [None]:
n_clusters = 3 # 클러스터 3
model = KMeans(n_clusters = n_clusters,
                       max_iter=300,
                       tol = 1e-4,
                       n_init = 'auto',
                       random_state=195397) # 본인학번이용

model.fit(X_train)

print("Cluster Center : ", model.cluster_centers_)
print("inertia : ", model.inertia_)
print("Iteration : ", model.n_iter_)


pred_train = model.predict(X_train)

scatter_matrix(pd.DataFrame(X, columns=iris['feature_names']), 
   s=100, figsize=(10,10), c=pred_train) # c = pred_train 을 넣으면 클러스터별 색상으로 표시
plt.show()