In [169]:
import pandas as pd
import numpy as np

import os

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv1D, MaxPooling1D
from tensorflow.keras.layers import LSTM, TimeDistributed, ConvLSTM2D
from sklearn.model_selection import StratifiedKFold

from sklearn.metrics import confusion_matrix,accuracy_score
from keras import backend as k

In [170]:
train_data_path = os.getcwd() + "/dataset/total_train.csv"
test_data_path = os.getcwd() + "/dataset/total_test.csv"

winLen = int(1*64)

In [171]:
def getTimeAndDF(path:str):
    """get dataset and dataset's time list
       some dataset's time is not continued, so get the time is for split windows

    Args:
        path (str): path of the dataset(for .csv)

    Returns:
        time (list): the list of dataset time
        df (np.array): dataset, columns include ["A_F", "A_V", "A_L", "Action"]
    """
    df = pd.read_csv(path)
    time = list(df['time'])
    action = list(df['Action'])
    df = df[["A_F", "A_V", "A_L"]].values

    return time, df, action

In [172]:
trainTime, trainData, trainAction = getTimeAndDF(train_data_path)
testTime, testData, testAction = getTimeAndDF(test_data_path)

In [173]:
trainData = (trainData-trainData.mean())/(trainData.std())
testData = (testData-testData.mean())/(testData.std())

In [174]:
def getTotalWindows(indices:list, windowSize:int , gap: float):
    """because
    check every part of time

    Args:
        indices (list): _description_
        windowSize (int): _description_
        gap (float): _description_

    Returns:
        _type_: _description_
    """
    groups = []
    group_count = 0
    temp = []
    lenOfGroup = []
    length_count = 0
    for i in range(len(indices)):
        if i == (len(indices) - 1):
            temp.append(indices[i])
            length_count = length_count + 1

            groups.append(temp)
            lenOfGroup.append(length_count)
            length_count = 0
            temp = []
            break
        temp.append(indices[i])
        length_count = length_count + 1
        if (indices[i+1] - gap > indices[i]):
            group_count = group_count + 1

            lenOfGroup.append(length_count)
            length_count = 0

            groups.append(temp)
            temp = []

    countOfUndivisible = 0
    totalWindows = 0

    stop_Indexs = []
    stop_Index = -windowSize


    for lengths in lenOfGroup:
        stop_Index = stop_Index + lengths
        stop_Indexs.append(stop_Index)
        totalWindows = totalWindows + int(float(lengths/windowSize)*2 -1)
        
        if lengths % (windowSize/2) != 0:
            countOfUndivisible = countOfUndivisible + 1
            print(lengths)

    return totalWindows, stop_Indexs

In [175]:
trainWindows, trainStop = getTotalWindows(trainTime, winLen, 20) 
testWindows, testStop = getTotalWindows(testTime, winLen, 20)
del trainTime, testTime

In [176]:
def XySplit(dataset:np.array, windows:int, length:int, stop:list, action:list):
    """split dataset into X and y, 
    X is 2D array, size of X is [windows, 64*3]
    y is 1D array, size of y is [windows, 1]

    Args:
        dataset (np.array): dataset
        windows (int): total windows that get from getTotalWindows()
        length (int): length of a piece of data, here is 3
        stop (list): stopList that get from getTotalWindows()
        action (list): action list

    Returns:
        X(np.array): X is 2D array, size of X is [windows, 64*3]
        y(np.array): y is 1D array, size of y is [windows, 1]
    """
    X = np.empty((windows, winLen*(length)))
    y = np.empty((windows, 1))

    stopIndex = 0
    windowCount = 0
    for win in range(windows):
        for i in range(winLen):
            if i == 0:
                y[win] = action[int(windowCount*winLen)]

            if int(windowCount*winLen)<len(dataset)-winLen-1:
                for data in range(length):
                    X[win, i*(length)+data] = dataset[int(windowCount*winLen) + i, data]

            if stopIndex < len(stop):
                if int(windowCount*winLen) == stop[stopIndex]:
                    windowCount += 0.5
                    stopIndex += 1
            
            if win == windows-1:
                for data in range(length):
                    X[win, i*(length) + data] = dataset[int((windowCount-0.5)*winLen) + i, data]
        
        windowCount += 0.5

    return X, y

In [177]:
X, y = XySplit(trainData, trainWindows, 3, trainStop, trainAction)
testX, testy = XySplit(testData, testWindows, 3, testStop, testAction)
del trainData, trainWindows, testData, testWindows, trainStop, trainAction, testStop, testAction

In [178]:
unique, counts = np.unique(y, return_counts=True)
class_weight = {0:(1/counts[0])*len(y)/2, 1:(1/counts[1])*len(y)/2, 2:(1/counts[2])*len(y)/2}

In [179]:
skf = StratifiedKFold(n_splits=10, shuffle = True, random_state=42)
skf.get_n_splits(X, y)

10

In [180]:
def to_3Darray(array):
    """_summary_

    Args:
        array (_type_): _description_

    Returns:
        _type_: _description_
    """
    arr_3d = np.empty((len(array), winLen, 3))
    arr_3d = np.reshape(array, (len(array), winLen, 3))
    return arr_3d

In [181]:
scores = []
scores_in_fold = []
losses = []
scores_outside_fold = []

In [182]:
for i, (train_index, val_index) in enumerate(skf.split(X, y)):
    print("==> Fold #%d" % i)
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]

    X_train = to_3Darray(X_train)
    y_train = to_categorical(y_train)

    X_val = to_3Darray(X_val)
    y_val = to_categorical(y_val)


    
    verbose, epochs, batch_size = 0, 50, 64
    n_timesteps, n_features, n_outputs = X_train.shape[1], X_train.shape[2], y_train.shape[1]

    n_steps, n_length = 2, 32
    X_train = X_train.reshape((X_train.shape[0], n_steps, n_length, n_features))
    X_val = X_val.reshape((X_val.shape[0], n_steps, n_length, n_features))


    model = Sequential()
    model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu'),
                            input_shape=(None, n_length, n_features)))
                            
    model.add(TimeDistributed(
        Conv1D(filters=64, kernel_size=3, activation='relu')))
    model.add(TimeDistributed(Dropout(0.5)))
    model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
    model.add(TimeDistributed(Flatten()))
    model.add(LSTM(100))
    model.add(Dropout(0.5))
    hunderdOutput = Dense(100, activation='relu')
    model.add(hunderdOutput)  # feature
    # 試著輸出長度為100的向量(feature) 並絳維 看他的分布有無分開
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='categorical_crossentropy',optimizer='adam', metrics=['accuracy']) #可能可以調weighting
    
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=verbose, class_weight = class_weight)
    
    loss, accuracy = model.evaluate(X_val, y_val, batch_size=batch_size, verbose=0)
    

    y_pred = (model.predict(X_val) > 0.5).astype("int32")
    y_val = np.argmax(y_val, axis=1)
    y_pred = np.argmax(y_pred, axis=1)

    confus = confusion_matrix(y_val, y_pred, labels=None, sample_weight=None)
    tp = confus[1][1]
    tn = confus[0][0] + confus[0][2] + confus[2][0] + confus[2][2]
    fp = confus[1][0] + confus[1][2]
    fn = confus[0][1] + confus[2][1]

    precision = (tp/(tp + fp))*100
    recall =  (tp / (tp + fn))*100   #sensitivity
    F1_score = ((2*tp) / (2*tp + fp + fn))*100

    score = accuracy
    losses.append(loss)
    
    score = score * 100.0
    scores_in_fold.append(score)
    
    print('Loss: %.3f%% Accuracy: %.3f%% ' % (loss, score))
    print('precision: %.3f%% recall: %.3f%% F1 score: %.3f%%' % (precision, recall, F1_score))
    # print(confus)


    
    '''score = evaluate_model(X_train, y_train, X_val, y_val)
    score = score * 100.0
    print(score)
    scores.append(score)'''

    

==> Fold #0
Loss: 0.429% Accuracy: 82.230% 
precision: 91.135% recall: 34.777% F1 score: 50.343%
==> Fold #1
Loss: 0.317% Accuracy: 87.708% 
precision: 87.943% recall: 45.338% F1 score: 59.831%
==> Fold #2
Loss: 0.316% Accuracy: 87.186% 
precision: 87.943% recall: 43.206% F1 score: 57.944%
==> Fold #3
Loss: 0.361% Accuracy: 87.643% 
precision: 86.525% recall: 43.964% F1 score: 58.303%
==> Fold #4
Loss: 0.335% Accuracy: 86.860% 
precision: 84.452% recall: 41.349% F1 score: 55.517%
==> Fold #5
Loss: 0.319% Accuracy: 89.077% 
precision: 85.159% recall: 46.796% F1 score: 60.401%
==> Fold #6
Loss: 0.311% Accuracy: 87.382% 
precision: 83.746% recall: 42.021% F1 score: 55.962%
==> Fold #7
Loss: 0.470% Accuracy: 83.534% 
precision: 86.926% recall: 36.123% F1 score: 51.037%
==> Fold #8
Loss: 0.413% Accuracy: 82.100% 
precision: 90.813% recall: 36.925% F1 score: 52.503%
==> Fold #9
Loss: 0.343% Accuracy: 87.410% 
precision: 86.525% recall: 42.361% F1 score: 56.876%


In [183]:
m, s = np.mean(scores_in_fold), np.std(scores_in_fold)
print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))
m, s = np.mean(losses), np.std(losses)
print('Loss: %.3f%% (+/-%.3f)' % (m, s))

Accuracy: 86.113% (+/-2.377)
Loss: 0.361% (+/-0.053)


In [184]:
testX = to_3Darray(testX)
testX = testX.reshape((testX.shape[0], n_steps, n_length, n_features))

In [185]:
testy = to_categorical(testy)
y_pred = (model.predict(testX) > 0.5).astype("int32")
testy = np.argmax(testy, axis=1)
y_pred = np.argmax(y_pred, axis=1)

confus = confusion_matrix(testy, y_pred, labels=None, sample_weight=None)

In [186]:
print(confus)

[[2730  142    2]
 [ 436   86    1]
 [  24   11    0]]


In [187]:
tp = confus[1][1]
tn = confus[0][0] + confus[0][2] + confus[2][0] + confus[2][2]
fp = confus[0][1] + confus[2][1]
fn = confus[1][0] + confus[1][2]

precision = (tp/(tp + fp))*100
sensitivity = (tp / (tp + fn))*100  # sensitivity
specificity = (tn/(tn + fp))*100
F1_score = ((2*tp) / (2*tp + fp + fn))*100

In [188]:
accuracy = accuracy_score(testy, y_pred)*100

In [189]:
accuracy = accuracy
print('Accuracy: %.3f%% ' % (accuracy))
print('FOG: specificity: %.3f%% sensitivity: %.3f%% F1 score: %.3f%%' % (specificity, sensitivity, F1_score))

Accuracy: 82.051% 
FOG: specificity: 94.740% sensitivity: 16.444% F1 score: 22.572%


In [190]:
tp = confus[2][2]
tn = confus[0][0] + confus[0][1] + confus[1][0] + confus[1][1]
fp = confus[0][2] + confus[1][2]
fn = confus[2][0] + confus[2][1]

precision = (tp/(tp + fp))*100
sensitivity = (tp / (tp + fn))*100  # sensitivity
specificity = (tn/(tn + fp))*100
F1_score = ((2*tp) / (2*tp + fp + fn))*100
print('PreFOG: specificity: %.3f%% sensitivity: %.3f%% F1 score: %.3f%%' % (specificity, sensitivity, F1_score))

PreFOG: specificity: 99.912% sensitivity: 0.000% F1 score: 0.000%


In [191]:
from keras.models import load_model

In [192]:
model.save('my_model.h5')