In [32]:
import pandas

data=pandas.read_csv("heart.csv")
"""
年齡 (Age): 患者的年齡
性別 (Sex): 患者的性別
運動誘發型心絞痛 (exng): 運動引起的心絞痛 (1 = 是; 0 = 否) 
主要血管數量 (ca): 主要血管的數量 (0-3 條)
胸痛類型 (cp): 胸痛的類型
值 1: 典型心絞痛
值 2: 非典型心絞痛
值 3: 非心絞痛
值 4: 無症狀
靜息血壓 (trtbps): 靜息時的血壓（以 mm Hg 為單位）
膽固醇 (chol): 透過 BMI 感測器獲取的膽固醇數值（單位 mg/dl）
空腹血糖 (fbs): 空腹血糖是否大於 120 mg/dl (1 = 是; 0 = 否)
靜息心電圖結果 (restecg): 靜息心電圖的結果
值 0: 正常
值 1: 出現 ST-T 波異常（T 波倒置和/或 ST 段升高或下降超過 0.05 mV）
值 2: 符合 Estes 標準，顯示可能或確定的左心室肥大
最大心率 (thalach): 達到的最大心率
目標 (target): 0 = 心臟病發作的機率較低; 1 = 心臟病發作的機率較高
"""
data

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
299,45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
301,57,1,0,130,131,0,1,115,1,1.2,1,1,3,0


In [33]:
X = data.iloc[:, 1:-1].values #輸入進去的數據
y = data.iloc[:, -1].values #結果

# 分割測試集
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

# 標準化
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
y_true = sc.transform(X_test)

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.cluster import KMeans
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score, r2_score, mean_squared_error
from scipy.stats import mode
import numpy as np

def align_kmeans_labels(y_true, y_pred):
    """對齊 kmeans 的分群標籤與真實類別"""
    labels = np.zeros_like(y_pred)
    for i in np.unique(y_pred):
        mask = y_pred == i
        labels[mask] = mode(y_true[mask], keepdims=False).mode
    return labels

def models(X_train, y_train, X_test, y_test):
    # 1. 決策樹
    tree = DecisionTreeClassifier(criterion='entropy', random_state=0)
    tree.fit(X_train, y_train)
    y_tree_pred = tree.predict(X_test)
    acc_tree = accuracy_score(y_test, y_tree_pred)

    # 2. KMeans 分群
    kmeans = KMeans(n_clusters=2, random_state=0)
    kmeans.fit(X_train)
    y_kmeans_pred = kmeans.predict(X_test)
    y_kmeans_aligned = align_kmeans_labels(y_test, y_kmeans_pred)
    acc_kmeans = accuracy_score(y_test, y_kmeans_aligned)

    # 3. 線性回歸
    reg = LinearRegression()
    reg.fit(X_train, y_train)
    y_reg_pred = reg.predict(X_test)
    r2 = r2_score(y_test, y_reg_pred)
    mse = mean_squared_error(y_test, y_reg_pred)

    print(f" 決策樹準確率: {acc_tree:.4f}")
    print(f" KMeans 準確率（對齊後）: {acc_kmeans:.4f}")
    print(f" 線性回歸 R²: {r2:.4f}")
    print(f" 線性回歸 MSE: {mse:.4f}")

    return tree, kmeans, reg

In [35]:
model = models(X_train, y_train, X_test, y_test)

 決策樹準確率: 0.4426
 KMeans 準確率（對齊後）: 0.5574
 線性回歸 R²: -18.3345
 線性回歸 MSE: 4.7700
