In [71]:
import pandas as pd
import numpy as np
import math

import os
import datetime, time

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv1D, MaxPooling1D
from tensorflow.keras.layers import LSTM, TimeDistributed, ConvLSTM2D
from sklearn.model_selection import StratifiedKFold

from sklearn.metrics import confusion_matrix,accuracy_score
from keras import backend as k

In [72]:
train_data_path = os.getcwd() + "/dataset/total_train.csv"
test_data_path = os.getcwd() + "/dataset/total_test.csv"
new_data_path = os.getcwd() + "/dataset/merge.csv"

fromPath = os.getcwd() +"/dataset/windows"
savePath = os.getcwd() +"/dataset"

testSubs = ["S01", "S02", "S03", "S05", "S06", "S07", "S08", "S09"]
winLen = int(1*64)

In [73]:
def create_window(window_length,dataframe):

  indices = list(dataframe.index)
  time = []
  time_count = 0
  for j in indices:
    time.append(dataframe.loc[j, 'time'])


  # indices記錄所有Action==act的index
  groups = [] # 用來暫存一組(同action)資料的, 型態是[][]
  temp = [] # 用來暫存一行資料的
  group_count = 0
  for i in range(len(indices)):
    if i == len(indices)-1:
      temp.append(indices[i])
      groups.append(temp)
      temp = []
      break # 如果i已經來到最後的話就break
    temp.append(indices[i])
    #time_count = time_count + 1
    if time[i+1]-16 > time[i]: #如果下個index不是連續的話, 就將前面這些存成第一組
      group_count+=1
      #time_count = time_count + 1
      groups.append(temp)
      temp = []

  #print(groups)

  fs = 64
  # window_length = 1
  # window_length = int(window_length*fs)

  final_dataframe = pd.DataFrame()
  sumOfAct=0

  for i in groups: # group[][]的每一行i
    required = math.floor(len(i)/int(window_length/2))

    
    sumOfAct= sumOfAct+required

    req_index = i[0:(required*int(window_length/2))]

    #print(req_index)
    # concat([要結合的data集合], axis=0是方向為直的)
    final_dataframe = pd.concat([final_dataframe,dataframe.iloc[req_index,:]],axis = 0)
  
  

  return final_dataframe

In [74]:
def saveDFtocsv(fromPath: str, toPath: str, compare:str, useOriginal=True, saveName = ""):
    """_summary_

    Args:
        fromPath (str): _description_
        toPath (str): _description_
        compare (str): _description_
    """
    
    for file in os.listdir(fromPath):
        if compare not in file:
            continue

        filePath = fromPath + '/' + file
        dataset = create_window(winLen, pd.read_csv(filePath))

        if useOriginal:
            savePath = toPath + "/" + file
        else:
            savePath = toPath + "/" + saveName + ".csv"
        dataset.to_csv(savePath, index=False)

In [75]:
def makeDataset(fromPath:str, savePath:str, testSub:str):
    train_patients_dataset = pd.DataFrame()
    test_patients_dataset = pd.DataFrame()

    for file in os.listdir(fromPath):
        if "win_" not in file:
            continue
        
        filePath = fromPath + '/' + file
        dataset = pd.read_csv(filePath)

        if testSub in file:
            test_patients_dataset = test_patients_dataset.append(dataset)
        else:    
            train_patients_dataset = train_patients_dataset.append(dataset)

    to_path = savePath + "/not_windowed_train.csv"
    train_patients_dataset.to_csv(to_path, index=False)

    to_path = savePath + "/not_windowed_test.csv"
    test_patients_dataset.to_csv(to_path, index=False)

In [76]:
def getTimeAndDF(path:str):
    """get dataset and dataset's time list
       some dataset's time is not continued, so get the time is for split windows

    Args:
        path (str): path of the dataset(for .csv)

    Returns:
        time (list): the list of dataset time
        df (np.array): dataset, columns include ["A_F", "A_V", "A_L", "Action"]
    """
    df = pd.read_csv(path)
    time = list(df['time'])
    action = list(df['Action'])
    df = df[["A_F", "A_V", "A_L"]].values

    return time, df, action

In [77]:
def getTotalWindows(indices:list, windowSize:int , gap: float):
    """because
    check every part of time

    Args:
        indices (list): _description_
        windowSize (int): _description_
        gap (float): _description_

    Returns:
        _type_: _description_
    """
    groups = []
    group_count = 0
    temp = []
    lenOfGroup = []
    length_count = 0
    for i in range(len(indices)):
        if i == (len(indices) - 1):
            temp.append(indices[i])
            length_count = length_count + 1

            groups.append(temp)
            lenOfGroup.append(length_count)
            length_count = 0
            temp = []
            break
        temp.append(indices[i])
        length_count = length_count + 1
        if (indices[i+1] - gap > indices[i]):
            group_count = group_count + 1

            lenOfGroup.append(length_count)
            length_count = 0

            groups.append(temp)
            temp = []

    countOfUndivisible = 0
    totalWindows = 0

    stop_Indexs = []
    stop_Index = -windowSize


    for lengths in lenOfGroup:
        stop_Index = stop_Index + lengths
        stop_Indexs.append(stop_Index)
        totalWindows = totalWindows + int(float(lengths/windowSize)*2 -1)
        
        if lengths % (windowSize/2) != 0:
            countOfUndivisible = countOfUndivisible + 1
            print(lengths)

    return totalWindows, stop_Indexs

In [78]:
def XySplit(dataset:np.array, windows:int, length:int, stop:list, action:list):
    """split dataset into X and y, 
    X is 2D array, size of X is [windows, 64*3]
    y is 1D array, size of y is [windows, 1]

    Args:
        dataset (np.array): dataset
        windows (int): total windows that get from getTotalWindows()
        length (int): length of a piece of data, here is 3
        stop (list): stopList that get from getTotalWindows()
        action (list): action list

    Returns:
        X(np.array): X is 2D array, size of X is [windows, 64*3]
        y(np.array): y is 1D array, size of y is [windows, 1]
    """
    X = np.empty((windows, winLen*(length)))
    y = np.empty((windows, 1))

    stopIndex = 0
    windowCount = 0
    for win in range(windows):
        for i in range(winLen):
            if i == 0:
                y[win] = action[int(windowCount*winLen + winLen/2)]

            if int(windowCount*winLen)<len(dataset)-winLen-1:
                for data in range(length):
                    X[win, i*(length)+data] = dataset[int(windowCount*winLen) + i, data]

            if stopIndex < len(stop):
                if int(windowCount*winLen) == stop[stopIndex]:
                    windowCount += 0.5
                    stopIndex += 1
            
            if win == windows-1:
                for data in range(length):
                    X[win, i*(length) + data] = dataset[int((windowCount-0.5)*winLen) + i, data]
        
        windowCount += 0.5

    return X, y

In [79]:
def to_3Darray(array):
    """_summary_

    Args:
        array (_type_): _description_

    Returns:
        _type_: _description_
    """
    arr_3d = np.empty((len(array), winLen, 3))
    arr_3d = np.reshape(array, (len(array), winLen, 3))
    return arr_3d

In [80]:
def setModel(n_length, n_features):
    model = Sequential()
    model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3,
              activation='relu'), input_shape=(None, n_length, n_features)))

    model.add(TimeDistributed(
        Conv1D(filters=64, kernel_size=3, activation='relu')))
    model.add(TimeDistributed(Dropout(0.5)))
    model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
    model.add(TimeDistributed(Flatten()))
    model.add(LSTM(100))
    model.add(Dropout(0.5))
    hunderdOutput = Dense(100, activation='relu')
    model.add(hunderdOutput)  # feature
    # 試著輸出長度為100的向量(feature) 並絳維 看他的分布有無分開
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [81]:
def trainingModel(skf, X, y, parameters, class_weight):
    losses = []
    scores_in_fold = []
    for i, (train_index, val_index) in enumerate(skf.split(X, y)):
        print("==> Fold #%d" % i)
        X_train, X_val = X[train_index], X[val_index]
        y_train, y_val = y[train_index], y[val_index]

        X_train = to_3Darray(X_train)
        y_train = to_categorical(y_train)

        X_val = to_3Darray(X_val)
        y_val = to_categorical(y_val)


        
        verbose, epochs, batch_size = 0, parameters[0], parameters[1]
        n_timesteps, n_features, n_outputs = X_train.shape[1], X_train.shape[2], y_train.shape[1]

        n_steps, n_length = parameters[2], parameters[3]
        X_train = X_train.reshape((X_train.shape[0], n_steps, n_length, n_features))
        X_val = X_val.reshape((X_val.shape[0], n_steps, n_length, n_features))


        model = setModel(n_length, n_features)
        
        model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=verbose, class_weight = class_weight)
        loss, accuracy = model.evaluate(X_val, y_val, batch_size=batch_size, verbose=0)
        

        y_pred = (model.predict(X_val) > 0.5).astype("int32")
        y_val = np.argmax(y_val, axis=1)
        y_pred = np.argmax(y_pred, axis=1)

        confus = confusion_matrix(y_val, y_pred, labels=None, sample_weight=None)
        tp = confus[1][1]
        tn = confus[0][0] + confus[0][2] + confus[2][0] + confus[2][2]
        fp = confus[1][0] + confus[1][2]
        fn = confus[0][1] + confus[2][1]

        precision = (tp/(tp + fp))*100
        recall =  (tp / (tp + fn))*100   #sensitivity
        F1_score = ((2*tp) / (2*tp + fp + fn))*100

        score = accuracy
        losses.append(loss)
        
        score = score * 100.0
        scores_in_fold.append(score)
        
        print('Loss: %.3f%% Accuracy: %.3f%% ' % (loss, score))
        print('precision: %.3f%% recall: %.3f%% F1 score: %.3f%%' % (precision, recall, F1_score))

    return model, losses, scores_in_fold

In [82]:
def predictData(model, testX, testy, parameters):

    testX = to_3Darray(testX)
    testX = testX.reshape((testX.shape[0], parameters[2], parameters[3], testX.shape[2]))

    testy = to_categorical(testy)
    y_pred = (model.predict(testX) > 0.5).astype("int32")
    testy = np.argmax(testy, axis=1)
    y_pred = np.argmax(y_pred, axis=1)

    confus = confusion_matrix(testy, y_pred, labels=None, sample_weight=None)

    return confus, y_pred

In [83]:
def performance(state:int, confus:list):
    performList = []
    if state == 1:
        tp = confus[1][1]
        tn = confus[0][0] + confus[0][2] + confus[2][0] + confus[2][2]
        fp = confus[0][1] + confus[2][1]
        fn = confus[1][0] + confus[1][2]
    elif state == 2:
        tp = confus[2][2]
        tn = confus[0][0] + confus[0][1] + confus[1][0] + confus[1][1]
        fp = confus[0][2] + confus[1][2]
        fn = confus[2][0] + confus[2][1]

    precision = (tp/(tp + fp))*100
    sensitivity = (tp / (tp + fn))*100  # sensitivity
    specificity = (tn/(tn + fp))*100
    F1_score = ((2*tp) / (2*tp + fp + fn))*100

    performList.append(precision)
    performList.append(sensitivity)
    performList.append(specificity)
    performList.append(F1_score)

    return performList

In [84]:
parameters = [50, 64, 2, 32] # epoch, batch size, n_steps, n_length
date, currTime = str(datetime.date.today()), str(time.strftime("%H-%M", time.localtime()))

resultPath = os.getcwd() +"/result/" + date + '/' 
if not os.path.exists(resultPath):
    os.mkdir(resultPath)
file = open(resultPath + currTime + ".txt", "a+")

file.write("Epoch=" + str(parameters[0]) + " Batch Size=" + str(parameters[1]) + " steps=" + str(parameters[2]) + " length=" + str(parameters[3]) + '\n')
file.close()

for testSub in testSubs:
    makeDataset(fromPath, savePath, testSub)

    saveDFtocsv(savePath, savePath, "not_windowed_train", False, "total_train")
    saveDFtocsv(savePath, savePath, "not_windowed_test", False, "total_test")

    trainTime, trainData, trainAction = getTimeAndDF(train_data_path)
    testTime, testData, testAction = getTimeAndDF(test_data_path)
    newTime, newData, newAction = getTimeAndDF(new_data_path)

    trainData = (trainData-trainData.mean())/(trainData.std())
    testData = (testData-testData.mean())/(testData.std())
    newData = (newData-newData.mean())/(newData.std())

    trainWindows, trainStop = getTotalWindows(trainTime, winLen, 20) 
    newWindows, newStop = getTotalWindows(newTime, winLen, 0.02)
    testWindows, testStop = getTotalWindows(testTime, winLen, 20)

    del trainTime, newTime, testTime

    trainX, trainy = XySplit(trainData, trainWindows, 3, trainStop, trainAction)
    newX, newy = XySplit(newData, newWindows, 3, newStop, newAction)
    testX, testy = XySplit(testData, testWindows, 3, testStop, testAction)

    X = np.concatenate((trainX, newX))
    y = np.concatenate((trainy, newy))

    del trainX, trainy, newX, newy, trainData, trainWindows, newData, newWindows, testWindows, trainStop, trainAction, newStop, newAction, testStop, testAction

    unique, counts = np.unique(y, return_counts=True)
    class_weight = {0:(1/counts[0])*len(y)/2, 1:(1/counts[1])*len(y)/2, 2:(1/counts[2])*len(y)/2}

    skf = StratifiedKFold(n_splits=10, shuffle = True, random_state=42)
    skf.get_n_splits(X, y)

    scores = []

    model, losses, scores_in_fold = trainingModel(skf, X, y, parameters, class_weight)

    file = open(resultPath + currTime + ".txt", "a+")
    file.write(testSub + "\n" + "\n")

    m, s = np.mean(scores_in_fold), np.std(scores_in_fold)
    temp = 'Training Accuracy: ' + str(round(m, 3)) +' (+/-' + str(round(s, 3)) +')'
    file.write(temp + "\n")

    m, s = np.mean(losses), np.std(losses)
    temp = 'Training Loss: ' + str(round(m, 3)) +' (+/-' + str(round(s, 3)) +')'
    file.write(temp + "\n" + "\n")

    confus, y_pred = predictData(model, testX, testy, parameters)

    accuracy = accuracy_score(testy, y_pred)*100
    perform = performance(1, confus)

    temp = 'Test Accuracy: ' + str(round(accuracy, 3))
    file.write(temp + "\n")
    temp = 'FOG: \nspecificity: ' + str(round(perform[1], 3)) + ' sensitivity: ' + str(round(perform[2], 3)) + ' F1 score: ' + str(str(round(perform[3], 3)))
    file.write(temp + "\n")

    perform = performance(2, confus)
    temp = 'PreFOG: \nspecificity: ' + str(round(perform[1], 3)) + ' sensitivity: ' + str(round(perform[2], 3)) + ' F1 score: ' + str(str(round(perform[3], 3)))
    file.write(temp + "\n")
    file.close()

==> Fold #0
Loss: 0.282% Accuracy: 89.746% 
precision: 85.538% recall: 87.018% F1 score: 86.271%
==> Fold #1
Loss: 0.256% Accuracy: 90.683% 
precision: 88.476% recall: 85.119% F1 score: 86.765%
==> Fold #2
Loss: 0.268% Accuracy: 90.979% 
precision: 87.007% recall: 88.097% F1 score: 87.549%
==> Fold #3
Loss: 0.273% Accuracy: 90.436% 
precision: 90.023% recall: 85.337% F1 score: 87.618%
==> Fold #4
Loss: 0.252% Accuracy: 91.866% 
precision: 90.333% recall: 87.556% F1 score: 88.923%
==> Fold #5
Loss: 0.252% Accuracy: 91.642% 
precision: 86.775% recall: 90.120% F1 score: 88.416%
==> Fold #6
Loss: 0.271% Accuracy: 90.409% 
precision: 87.626% recall: 86.225% F1 score: 86.920%
==> Fold #7
Loss: 0.278% Accuracy: 89.127% 
precision: 83.063% recall: 87.745% F1 score: 85.340%
==> Fold #8
Loss: 0.314% Accuracy: 84.320% 
precision: 90.797% recall: 82.910% F1 score: 86.674%
==> Fold #9
Loss: 0.270% Accuracy: 90.557% 
precision: 89.172% recall: 85.789% F1 score: 87.448%
==> Fold #0
Loss: 0.264% Accur