In [52]:
import numpy as np
import pandas as pd
from google.colab import drive
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch
from torch.utils.data import Dataset, DataLoader
import pickle
import random
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score,f1_score,confusion_matrix,roc_auc_score,ConfusionMatrixDisplay,precision_score,recall_score


In [None]:
import numpy as np
from scipy import signal

def preprocess(data):
  # Delete silent part
  data = data.astype(float)
  n_size = 50
  n_len = int(data.shape[0]/n_size)
  std_data = np.zeros((n_size, 2))
  for i in range(n_size):
    seg_data_x = data[i*n_len:i*n_len+n_len, 0]
    seg_data_y = data[i*n_len:i*n_len+n_len, 1]
    std_data[i, 0] = np.std(seg_data_x)
    std_data[i, 1] = np.std(seg_data_y)
  pass_threshold = 1
  pass_idx_x = np.where(std_data[:,0] >= pass_threshold)[0]
  pass_idx_y = np.where(std_data[:,1] >= pass_threshold)[0]
  if len(pass_idx_x) == 0:
    start_idx = max(0, pass_idx_y[0] - 1)
    end_idx = min(data.shape[0],pass_idx_y[-1] + 1)
  elif len(pass_idx_y) == 0:
    start_idx = max(0, pass_idx_x[0] - 1)
    end_idx = min(data.shape[0],pass_idx_x[-1] + 1)
  else:
    start_idx = max(0,min(pass_idx_x[0], pass_idx_y[0]) - 1)
    end_idx = min(data.shape[0],max(pass_idx_x[-1], pass_idx_y[-1]) + 1)
  
  # resample to 100 data points
  data = signal.resample(data[start_idx*n_len:end_idx*n_len, :], 100, axis=0)
  # scale
  data = (data - data.min(axis=0, keepdims=True))/(data.max(axis=0, keepdims=True) - data.min(axis=0, keepdims=True))
  return data


In [None]:

drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
main_dir = '/content/drive/MyDrive/S1Database/isolated/'
os.listdir(main_dir)

['ReadMe', '004', '002', '006', '003', '005', '001']

In [None]:
# get file list for 1 patient
def get_file_list(path):
    file_list = []
    label = []
    for i in os.listdir(path):
        file_list.append(i)
        l = int(i.split("_")[-1].split(".")[0])-1
        label.append(l)
    return label,file_list

In [None]:
# get feature from 1 file and preprocess
def get_feature(path):
    f = []
    a = pd.read_csv(path,names=["vertical","horizontal"])
    a = np.array(a)
    #print(a.shape)
    a = preprocess(a)
    #print(a.shape)
    f.append(a)        

    return f
    
    

In [None]:
# self identified test_split
def my_train_test_split_user_dependent(path,test_split,file_list,label):
    X_test = []
    X_train = []
    y_train = []
    y_test = []
    for f in range(len(file_list)):
        file = file_list[f]
        file_label = label[f]
        feature = get_feature(str(path+file))
        #print(file.split('_')[2],file_label)
        if file.split('_')[2] in test_split:
            X_test.append(feature)
            y_test.append(file_label)
            #print(file,len(feature))
        else:
            X_train.append(feature)
            y_train.append(file_label)
            #print(file)

    X_train = np.array(X_train)
    X_test = np.array(X_test)
    y_train = np.array(y_train)
    y_test = np.array(y_test)
    return X_train,X_test, y_train,y_test

In [None]:
def evaluate(y_true, y_pred,y_prob):
    f1_micro = f1_score(y_true, y_pred,average = 'micro')
    f1_macro = f1_score(y_true, y_pred,average = 'macro')
    precision_micro = precision_score(y_true, y_pred, average='micro')
    precision_macro = precision_score(y_true, y_pred, average='macro')
    recall_micro = recall_score(y_true, y_pred, average='micro')
    recall_macro = recall_score(y_true, y_pred, average='macro')
    acc = accuracy_score(y_true, y_pred)

    return f1_micro,f1_macro,precision_micro,precision_macro,recall_micro,recall_macro,acc

In [None]:

class classficiation(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, bidirectional=False):
        super(classficiation, self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional)
        self.fc = nn.Linear(hidden_size * (2 if bidirectional else 1), num_classes)

    def forward(self, x):
        # Initialize hidden and cell states
        h0 = torch.zeros(self.num_layers * (2 if self.lstm.bidirectional else 1), x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers * (2 if self.lstm.bidirectional else 1), x.size(0), self.hidden_size).to(x.device)
        
        # LSTM layer
        out, _ = self.lstm(x, (h0, c0))
        
        # Fully connected layer
        out = self.fc(out[:, -1, :])
        
        return out




In [62]:
# Define custom dataset class
class MYDataset(Dataset):
    def __init__(self, feature,label):
      self.feature = []
      self.label = []
      for i in range(len(feature)):
        self.feature.append(feature[i])
        self.label.append(label[i])

    def __len__(self):
        return len(self.label)

    def __getitem__(self, idx):
        f = self.feature[idx]
        #f = f[np.newaxis,:,:]
        #f = f.reshape(100, 2)
        f = f.squeeze()
        l = self.label[idx]
        return f, l


# User Dependent

In [None]:
patient = ["001","002","003","004","005","006"]
test_split = [["01","02"],["03","04"],["05","06"],["07","08"],["09","10"]]

In [64]:
#### def training
def train(model,train_dataloader,num_epochs,optimizer,criterion):
  train_loss = []
  model = model.to(device)
  for epoch in range(num_epochs):
        #print("running epoch: ", epoch)
        model.train()
        Loss = 0
        train_label = []
        train_ypred = []
        for i, (feature,label) in enumerate(train_dataloader, 0):
          #print(feature.shape)
          optimizer.zero_grad()
          feature = feature.to(device)
          # print(feature.shape)
          output = model(feature.float())
          ypred = torch.argmax(output.cpu(),dim=1)
          #print(feature.shape,output.shape)
          #print(ypred)
          for j in ypred:
            train_ypred.append(j)
          for j in label:
            train_label.append(j)
          loss = criterion(output,label.to(device))
          loss.backward()
          optimizer.step()
          Loss += loss.item()
        Loss = Loss/(i+1)
        acc = accuracy_score(y_true=train_label, y_pred= train_ypred)
        train_loss.append(Loss)
  #model.eval()

        #print("train loss: ",Loss," train accuracy: ",acc)
  return model

In [None]:
def eval(model,test_dataloader,m):
  with torch.no_grad():
    model.eval()
    label_list = []
    ypred_list = []
    yprob_list = []
        #Loss = 0
    for i, (feature,label) in enumerate(test_dataloader, 0):
      feature = feature.to(device)
      output = model(feature.float())
      #print(output.shape)
      ypred = torch.argmax(output,dim=1)
      yprob = m(output)
      for j in ypred.cpu().detach().numpy():
        ypred_list.append(j)
      for l in label.cpu().detach().numpy():
        label_list.append(l)
      for l in yprob.cpu().detach().numpy():
        yprob_list.append(l)
    f1_micro,f1_macro,precision_micro,precision_macro,recall_micro,recall_macro,acc = evaluate(label_list, ypred_list,yprob_list)

    #print("test acc: ", acc)
    return f1_micro,f1_macro,precision_micro,precision_macro,recall_micro,recall_macro,acc

In [57]:
print(model)

classficiation(
  (lstm): LSTM(2, 64, num_layers=2, batch_first=True)
  (fc): Linear(in_features=64, out_features=12, bias=True)
)


## train 5-fold cross validation

In [66]:
from tqdm.auto import tqdm

device = 'cuda' if torch.cuda.is_available() else 'cpu'
m = nn.Softmax(dim=1).to(device)
total_acc = []
total_f1_micro = []
total_f1_macro = []
total_recall_micro = []
total_recall_macro = []
total_precision_micro = []
total_precision_macro = []
input_size = 2  # Set the input size according to your specific problem
hidden_size = 64
num_layers = 2
num_classes = 12  # Set the number of classes according to your specific problem
bidirectional = False
for p in tqdm(patient):
    #print(p)
    output_pck = []
    path = str(main_dir+p+"/isolated_strokes/")
    acc = 0 
    f1_macro =0
    f1_micro =0
    recall_micro =0
    recall_macro =0
    precision_micro =0
    precision_macro =0
    for t in tqdm(test_split):
        label,file_list = get_file_list(path)
        X_train,X_test, y_train,y_test = my_train_test_split_user_dependent(path,t,file_list,label)
        #print(X_train.shape,X_test.shape, y_train.shape,y_test.shape)
        train_data = MYDataset(X_train,y_train)
        train_dataloader  = DataLoader(train_data, batch_size=16, shuffle=True)
        test_data = MYDataset(X_test,y_test)
        test_dataloader  = DataLoader(test_data, batch_size=16, shuffle=True)
        #self, input_size, hidden_size, num_layers, num_classes, bidirectional=False
        # set new model
        torch.manual_seed(42)
        
        model = classficiation(input_size, hidden_size, num_layers, num_classes, bidirectional)

        lr = 0.0001
        optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)
        criterion = nn.CrossEntropyLoss()
        num_epochs = 50
        
        # train 
        model = train(model,train_dataloader,num_epochs,optimizer,criterion)
        #AssertionError: LSTM: Expected input to be 2-D or 3-D but received 4-D tensor
        # evaluate 
        f1_mi,f1_ma,precision_mi,precision_ma,recall_mi,recall_ma,accuracy = eval(model,test_dataloader,m)
        acc += accuracy
        f1_macro += f1_ma
        f1_micro += f1_mi
        recall_micro += recall_mi
        recall_macro += recall_ma
        precision_micro += precision_mi
        precision_macro += precision_ma
    
    print(p,acc/5,f1_macro/5,f1_micro/5,recall_micro/5,recall_macro/5,precision_micro/5,precision_macro/5)
    total_acc.append(acc/5)
    total_f1_micro.append(f1_micro/5)
    total_f1_macro.append(f1_macro/5)
    total_recall_micro.append(recall_micro/5)
    total_recall_macro.append(recall_macro/5)
    total_precision_micro.append(precision_micro/5)
    total_precision_macro.append(precision_macro/5)



  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


001 0.08333333333333333 0.012820512820512822 0.08333333333333333 0.08333333333333333 0.08333333333333333 0.08333333333333333 0.006944444444444445


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/5 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


002 0.08333333333333333 0.012820512820512822 0.08333333333333333 0.08333333333333333 0.08333333333333333 0.08333333333333333 0.006944444444444445


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/5 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


003 0.07499999999999998 0.012950244200244201 0.07499999999999998 0.07499999999999998 0.07499999999999998 0.07499999999999998 0.007111528822055138


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/5 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


004 0.08333333333333333 0.013034188034188035 0.08333333333333333 0.08333333333333333 0.08333333333333333 0.08333333333333333 0.00707070707070707


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/5 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


005 0.07536231884057971 0.012637362637362638 0.07536231884057971 0.07536231884057971 0.075 0.07536231884057971 0.006944444444444445


  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/5 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


006 0.08333333333333333 0.022762885262885262 0.08333333333333333 0.08333333333333333 0.08333333333333333 0.08333333333333333 0.013624338624338622


  _warn_prf(average, modifier, msg_start, len(result))


In [67]:
print("Total acc",np.mean(np.array(total_acc)))
print("f1_macro",np.mean(np.array(total_f1_macro)))
print("f1_micro",np.mean(np.array(total_f1_micro)))
print("total_recall_micro",np.mean(np.array(total_recall_micro)))
print("total_recall_macro",np.mean(np.array(total_recall_macro)))
print("total_precision_micro",np.mean(np.array(total_precision_micro)))
print("total_precision_macro",np.mean(np.array(total_precision_macro)))

Total acc 0.08061594202898549
f1_macro 0.014504284295950964
f1_micro 0.08061594202898549
total_recall_micro 0.08061594202898549
total_recall_macro 0.08055555555555555
total_precision_micro 0.08061594202898549
total_precision_macro 0.008106651308405695


In [71]:
#########training
for p in patient:
    print(p)
    path = str(main_dir+p+"/isolated_strokes/")
    label,file_list = get_file_list(path)
    X = []
    y = []
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    for f in range(len(file_list)):
        file = file_list[f]
        file_label = label[f]
        feature = get_feature(str(path+file))
        #print(file.split('_')[2],file_label)
        X.append(feature)
        y.append(file_label)
    X = np.array(X)
    y = np.array(y)
    train_data = MYDataset(X,y)
    train_dataloader  = DataLoader(train_data, batch_size=16, shuffle=True)
        # set new model
    torch.manual_seed(42)
    
    lr = 0.0001
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    criterion = nn.CrossEntropyLoss()
    num_epochs = 50


001
002
003
004
005
006


# Split on patient level - User-independent

In [72]:
# self identified test_split
def my_train_test_split_user_independent(test_patient,train_patient):
    X_test = []
    X_train = []
    y_train = []
    y_test = []
    for p in train_patient:
        path = str(main_dir+p+"/isolated_strokes/")
        label,file_list = get_file_list(path)
        for i in range(len(file_list)):
            file = file_list[i]
            file_label = label[i]
            feature = get_feature(str(path+file))
            X_train.append(feature)
            y_train.append(file_label)
        
    path = str(main_dir+test_patient+"/isolated_strokes/")
    label,file_list = get_file_list(path)
    for i in range(len(file_list)):
        file = file_list[i]
        file_label = label[i]
        feature = get_feature(str(path+file))
        X_test.append(feature)
        y_test.append(file_label)

    X_train = np.array(X_train)
    X_test = np.array(X_test)
    y_train = np.array(y_train)
    y_test = np.array(y_test)
    return X_train,X_test, y_train,y_test

In [74]:
patient = ["001","002","003","004","005","006"]
acc = 0 
f1_macro =0
f1_micro =0
recall_micro =0
recall_macro =0
precision_micro =0
precision_macro =0
for test_patient in patient:
    train_patient= ["001","002","003","004","005","006"]
    train_patient.remove(test_patient)
    X_train,X_test, y_train,y_test = my_train_test_split_user_independent(test_patient,train_patient)
    train_data = MYDataset(X_train,y_train)
    train_dataloader  = DataLoader(train_data, batch_size=16, shuffle=True)
    test_data = MYDataset(X_test,y_test)
    test_dataloader  = DataLoader(test_data, batch_size=16, shuffle=True)
        
    # set new model
    torch.manual_seed(42)
  
    lr = 0.0001
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    criterion = nn.CrossEntropyLoss()
    num_epochs = 50
        # train 
    model = train(model,train_dataloader,num_epochs,optimizer,criterion)
        # evaluate 
    # evaluate 
    f1_mi,f1_ma,precision_mi,precision_ma,recall_mi,recall_ma,accuracy = eval(model,test_dataloader,m)
    acc += accuracy
    f1_macro += f1_ma
    f1_micro += f1_mi
    recall_micro += recall_mi
    recall_macro += recall_ma
    precision_micro += precision_mi
    precision_macro += precision_ma
    
print(acc/6,f1_macro/6,f1_micro/6,recall_micro/6,recall_macro/6,precision_micro/6,precision_macro/6)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0.07592015535383644 0.019116364820843997 0.07592015535383644 0.07592015535383644 0.075 0.07592015535383644 0.012855702705310288


  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
#########training
X = []
y = []
for p in patient:
    print(p)
    path = str(main_dir+p+"/isolated_strokes/")
    label,file_list = get_file_list(path)
    for f in range(len(file_list)):
        file = file_list[f]
        file_label = label[f]
        feature = get_feature(str(path+file))
        #print(file.split('_')[2],file_label)
        X.append(feature)
        y.append(file_label)
X = np.array(X)
y = np.array(y)
train_data = MYDataset(X,y)
train_dataloader  = DataLoader(train_data, batch_size=16, shuffle=True)
        # set new model
torch.manual_seed(42)
model = classficiation()
lr = 0.0001
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)
criterion = nn.CrossEntropyLoss()
num_epochs = 50
        # train 



001
002
003
004
005
006
