In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv('./Data/train_data.csv', header=None)
label = pd.read_csv('./Data/train_label.csv', header=None)
data.columns = [f'a{i}' for i in data.columns]
label.columns = ['label']

In [3]:
from sklearn.preprocessing import OneHotEncoder

# 創建OneHotEncoder對象
enc = OneHotEncoder()

# 將訓練集和測試集的標籤都用於擬合Encoder
enc.fit(label)

# 將訓練集和測試集的標籤都轉換成One-Hot編碼
label_enc = enc.transform(label).toarray()

# 新增一個column並填入0
unknown_col = np.zeros((label_enc.shape[0], 1))

# 將unknown column插入到最後一欄
label_enc = np.hstack((label_enc, unknown_col))

In [4]:
pd.DataFrame(label_enc)

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
288,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
289,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
290,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
291,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
# 填補缺失值
data_no_na = np.array(pd.DataFrame(data).fillna(pd.DataFrame(data).mean()))
data_no_na

array([[ 75. ,   0. , 190. , ...,   2.9,  23.3,  49.4],
       [ 56. ,   1. , 165. , ...,   2.1,  20.4,  38.8],
       [ 55. ,   0. , 175. , ...,   2.6,  34.6,  61.6],
       ...,
       [ 54. ,   0. , 160. , ...,  -0.8,  24.4,  18.2],
       [ 62. ,   1. , 157. , ...,  -0.8,  35.7,  28.5],
       [ 40. ,   0. , 170. , ...,   1.5,  24.6,  42. ]])

In [6]:
from sklearn.model_selection import train_test_split

# 分割數據集和標籤為訓練集和測試集
train_data, test_data, train_label, test_label = train_test_split(data_no_na, label_enc, test_size=0.2, random_state=42)


In [7]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
train_data_scaled = scaler.fit_transform(train_data)
test_data_scaled = scaler.transform(test_data)

In [8]:
pd.DataFrame(train_data_scaled)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,269,270,271,272,273,274,275,276,277,278
0,1.279549,-1.178030,-0.057136,-0.960855,-0.690643,0.368423,-0.080128,0.108400,0.666899,0.357239,...,0.857564,0.521775,-0.138147,0.285652,-0.110883,0.0,0.579841,1.191817,-0.020894,0.738835
1,-1.102007,0.848875,-0.286522,-0.666562,-0.795110,0.474074,0.138621,-0.452787,-0.286421,-0.590991,...,0.638253,0.521775,0.357752,0.913283,-0.110883,0.0,1.225949,0.355921,1.901986,1.384377
2,0.948778,0.848875,-0.188214,-0.372268,-0.690643,-0.397545,1.763616,-0.845618,-0.452216,0.606774,...,-0.458305,0.521775,-0.881995,-0.341978,-0.110883,0.0,-0.712376,-0.479974,-0.788321,-0.816079
3,0.221080,0.848875,-0.352061,2.040936,-0.168308,1.609819,1.669866,1.483310,1.122835,-0.141829,...,0.857564,0.521775,-0.355103,-1.911055,-0.110883,0.0,0.256787,-0.403984,-0.762453,-0.664517
4,0.221080,0.848875,-0.352061,0.039742,-0.795110,-0.503196,0.044872,-0.565025,-0.286421,-0.590991,...,-0.458305,0.521775,0.698682,0.913283,-0.110883,0.0,0.579841,0.051959,0.608569,0.239242
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,2.073401,-1.178030,0.237788,0.922622,0.771895,0.711788,-0.267627,-0.536965,-0.659460,-0.391364,...,-0.896928,-2.009707,0.016821,0.913283,-0.110883,0.0,1.225949,-1.163889,0.436114,-0.479275
230,-1.962013,-1.178030,-0.024367,-1.137431,1.085296,-0.133418,0.201121,-0.593084,0.501105,0.881262,...,-1.993485,-1.425519,2.093396,-0.734247,-0.110883,0.0,-0.712376,-2.531718,1.151805,-1.394260
231,0.287234,0.848875,-0.352061,-0.372268,-1.212978,0.236360,0.607370,-0.508906,0.293861,-0.091922,...,1.076876,0.521775,-1.036963,0.521014,-0.110883,0.0,-0.066268,-0.100022,-0.607243,-0.484888
232,0.684160,-1.178030,-0.024367,0.275177,0.667428,0.051471,0.669870,-0.144134,0.584002,-0.840526,...,0.418941,-1.230790,-0.758020,0.913283,-0.110883,0.0,0.256787,0.811864,-0.357182,0.362736


In [9]:
train_data_scaled.shape

(234, 279)

In [10]:
train_data_3d = np.reshape(train_data_scaled, (train_data_scaled.shape[0], train_data_scaled.shape[1], 1))
test_data_3d = np.reshape(test_data_scaled, (test_data_scaled.shape[0], test_data_scaled.shape[1], 1))

In [11]:
train_data_3d

array([[[ 1.27954927],
        [-1.17803018],
        [-0.05713642],
        ...,
        [ 1.19181677],
        [-0.02089366],
        [ 0.73883476]],

       [[-1.10200686],
        [ 0.84887469],
        [-0.28652235],
        ...,
        [ 0.3559213 ],
        [ 1.90198642],
        [ 1.38437661]],

       [[ 0.94877758],
        [ 0.84887469],
        [-0.1882141 ],
        ...,
        [-0.47997417],
        [-0.78832113],
        [-0.81607909]],

       ...,

       [[ 0.28723422],
        [ 0.84887469],
        [-0.35206119],
        ...,
        [-0.10002168],
        [-0.60724274],
        [-0.48488805]],

       [[ 0.68416024],
        [-1.17803018],
        [-0.024367  ],
        ...,
        [ 0.81186429],
        [-0.3571821 ],
        [ 0.36273647]],

       [[ 1.47801228],
        [-1.17803018],
        [ 0.13948009],
        ...,
        [ 1.03983578],
        [-0.23646318],
        [ 0.59288617]]])

In [12]:
train_data_3d.shape

(234, 279, 1)

In [13]:
import tensorflow as tf
from tensorflow import keras

In [14]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]

In [15]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('gpu')))

Num GPUs Available:  0


In [16]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  # Create 2 virtual GPUs with 1GB memory each
  try:
    tf.config.set_logical_device_configuration(
        gpus[0],
        [tf.config.LogicalDeviceConfiguration(memory_limit=1024),
         tf.config.LogicalDeviceConfiguration(memory_limit=1024)])
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPU,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Virtual devices must be set before GPUs have been initialized
    print(e)

In [17]:
# 建立模型
from keras.layers import Dropout, BatchNormalization
from keras import regularizers

model = keras.models.Sequential([
    keras.layers.LSTM(48, activation='tanh', return_sequences=False, input_shape=(train_data_3d.shape[1], train_data_3d.shape[2]), kernel_regularizer=regularizers.l2(0.001)),
    Dropout(0.1),
    BatchNormalization(),
    keras.layers.Dense(36, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    Dropout(0.1),
    BatchNormalization(),
    keras.layers.Dense(24, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    Dropout(0.1),
    BatchNormalization(),
    keras.layers.Dense(18, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    Dropout(0.1),
    BatchNormalization(),
    keras.layers.Dense(9, activation='softmax', kernel_regularizer=regularizers.l2(0.001))
])

optimizer = keras.optimizers.Adam(learning_rate=0.001)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 48)                9600      
                                                                 
 dropout (Dropout)           (None, 48)                0         
                                                                 
 batch_normalization (BatchN  (None, 48)               192       
 ormalization)                                                   
                                                                 
 dense (Dense)               (None, 36)                1764      
                                                                 
 dropout_1 (Dropout)         (None, 36)                0         
                                                                 
 batch_normalization_1 (Batc  (None, 36)               144       
 hNormalization)                                        

In [18]:
from sklearn.model_selection import KFold

# 定義交叉驗證參數
n_splits = 5 # 設置5折交叉驗證
fold = KFold(n_splits=n_splits, shuffle=True, random_state=42)

In [19]:
# 增加 callbacks
early_stopping = keras.callbacks.EarlyStopping(start_from_epoch=150 ,patience=50, restore_best_weights=True)

# 進行交叉驗證
scores = []
for train_index, test_index in fold.split(data):
    # 將數據分為訓練集和測試集
    X_train, X_test = data_no_na[train_index], data_no_na[test_index]
    y_train, y_test = label_enc[train_index], label_enc[test_index]

    # 標準化
    train_data_scaled = scaler.fit_transform(X_train)
    test_data_scaled = scaler.transform(X_test)

    train_data_3d = np.reshape(train_data_scaled, (train_data_scaled.shape[0], train_data_scaled.shape[1], 1))
    test_data_3d = np.reshape(test_data_scaled, (test_data_scaled.shape[0], test_data_scaled.shape[1], 1))

     # 編譯模型
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    # 訓練模型
    model.fit(train_data_3d, y_train, epochs=500, batch_size=64, validation_data=(test_data_3d, y_test), callbacks=[early_stopping])

    # 評估模型
    score = model.evaluate(test_data_3d, y_test, verbose=0)
    scores.append(score[1])

# 輸出交叉驗證結果
print("Cross-validation accuracy: %.2f%% (+/- %.2f%%)" % (np.mean(scores) * 100, np.std(scores) * 100))

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

In [20]:
scores

[0.7288135886192322,
 0.8644067645072937,
 0.7457627058029175,
 0.8275862336158752,
 0.8793103694915771]

In [28]:
import datetime

# 獲取當前日期和時間
now = datetime.datetime.now()

# 格式化日期和時間，並作為模型名稱
model_name = f"Arrhythmia_{now.strftime('%Y-%m-%d_%H-%M-%S')}"

# 儲存模型
model.save(f"{model_name}.h5")