In [None]:
# 區分訓練和測試資料集，再利用 Confusion Matrix 來衡量分類模型效力


# 兩個類別的 Confusion Matrix 範例


from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from scipy.stats import mode
from sklearn.metrics import confusion_matrix , classification_report
import numpy


# 1. 先產生資料

# 中心在 ( 5 , 5 ) 的第一組資料

x1 = numpy.random.normal(loc=5,scale=1,size=100)
x2 = numpy.random.normal(loc=5,scale=1,size=100)

x1_set = numpy.stack((x1,x2),axis=1)

y1 = numpy.zeros_like(x1)

# 中心在 ( 10 , 10 ) 的第二組資料

x3 = numpy.random.normal(loc=10,scale=2,size=100)
x4 = numpy.random.normal(loc=10,scale=2,size=100)

x2_set = numpy.stack((x3,x4),axis=1)

y2 = numpy.zeros_like(x3)
y2[:] = 1

# 把資料合起來並分拆做成訓練資料和測試資料

x = numpy.concatenate((x1_set,x2_set))
y = numpy.concatenate((y1,y2))

x_train , x_test , y_train , y_test = train_test_split(x,y,test_size=0.4)


# 2. 創建 K-means 物件並進行模型訓練

model = KMeans(n_clusters=2)
model.fit(x_train)

print(model.cluster_centers_)


# 3. 進行重新標籤

y_prediction = model.predict(x_test)

y_map = numpy.zeros_like(y_prediction)

for i in range(2) :

    y_map[y_prediction == i] = mode(y_test[y_prediction == i])[0]

    
# 4. 利用預測值 y_prediction 和 y_test 來產生混淆矩陣來看 ACR、TPR、FPR

matrix = confusion_matrix(y_test,y_map)

print(matrix)

print(f"accuracy_rate = {( matrix[0,0] + matrix[1,1] ) / ( matrix[0,0] + matrix[0,1] + matrix[1,0] + matrix[1,1] )}")

print(f"sensitivity_rate = {( matrix[0,0] ) / ( matrix[0,0] + matrix[0,1] )}")

print(f"precision_rate = {( matrix[0,0] ) / ( matrix[0,0] + matrix[1,0] )}")

print(classification_report(y_test,y_map,target_names=["0","1"]))




[[10.00026826 10.18692813]
 [ 4.78915026  4.84410749]]
[[46  0]
 [ 4 30]]
accuracy_rate = 0.95
sensitivity_rate = 1.0
precision_rate = 0.92
              precision    recall  f1-score   support

           0       0.92      1.00      0.96        46
           1       1.00      0.88      0.94        34

    accuracy                           0.95        80
   macro avg       0.96      0.94      0.95        80
weighted avg       0.95      0.95      0.95        80





In [None]:
# 三個以上類別的 Confusion Matrix 範例


# 用鳶尾花資料當範例


from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from scipy.stats import mode
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import numpy


# 1. 先準備資料並拆分成訓練和測試資料

iris = load_iris()

x = iris["data"]
y = iris["target"]

x_train , x_test , y_train , y_test = train_test_split(x,y,test_size=0.4)


# 2. 進行模型訓練

model = KMeans(n_clusters=3)
model.fit(x_train)

y_prediction = model.predict(x_test)


# 3. 重新 map 預測結果

y_map = numpy.zeros_like(y_prediction)

for i in range(3) : 

    y_map[y_prediction == i] = mode(y_test[y_prediction == i])[0]


# 4. 做出混淆矩陣以及評估指標

# accuracy : 預測和樣本整體符合率

# precision : 模型判斷正確率

# recall : 樣本沒有缺漏率

print(confusion_matrix(y_test,y_map))

print(classification_report(y_test,y_map,target_names=iris["target_names"]))



[[19  0  0]
 [ 0 17  2]
 [ 0  6 16]]
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        19
  versicolor       0.74      0.89      0.81        19
   virginica       0.89      0.73      0.80        22

    accuracy                           0.87        60
   macro avg       0.88      0.87      0.87        60
weighted avg       0.88      0.87      0.87        60



