In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import os
import tensorflow as tf
from tqdm import tqdm

import math
import torch.nn.functional as F
from sklearn.metrics import f1_score, mean_squared_error
from fastprogress import master_bar, progress_bar

%matplotlib inline

In [0]:
BATCH_SIZE = 16           
LEARNING_RATE = 0.001
LEARNING_RATE_SCHEDULE_FACTOR = 0.1           # Parameter used for reducing learning rate
LEARNING_RATE_SCHEDULE_PATIENCE = 5           # Parameter used for reducing learning rate
MAX_EPOCHS = 30                               # Maximum number of training epochs

# Input List of Dataset

In [0]:
INDEX_FEATURE = 2 # Select feature for training
FEATURES = ['dyskinesia', 'on_off', 'tremor']

In [0]:
PATH_FOLDER = '/content/drive/My Drive/Beat-PD/List_train_test'

def read_lst(file_name):
    list_ = []
    with open(file_name) as f:
        for element in f:
            list_.append(element.split('\n')[0])
        f.close()
    return list_ 

In [0]:
train_list = {}
val_list = {}

for fe in FEATURES:
    train_list[fe] = read_lst(os.path.join(PATH_FOLDER, 'train_train_nonan_' + fe +'.lst'))
    val_list[fe] = read_lst(os.path.join(PATH_FOLDER, 'train_val_nonan_' + fe + '.lst'))

# Data Preprocessing

In [6]:
LABEL_PATH = '/content/drive/My Drive/Beat-PD/Train_Dataset/Data_Train_CIS-PD/Data_Train_CIS-PD/cis-pd.data_labels/data_labels/CIS-PD_Training_Data_IDs_Labels.csv'
LABEL_DF = pd.read_csv(LABEL_PATH)
# LABEL_DF.dropna(axis=0, inplace=True)
LABEL_DF.head()

Unnamed: 0,measurement_id,subject_id,on_off,dyskinesia,tremor
0,cc7b822c-e310-46f0-a8ea-98c95fdb67a1,1004,1.0,1.0,1.0
1,5163afe8-a6b0-4ea4-b2ba-9b4501dd5912,1004,0.0,0.0,0.0
2,5cf68c8e-0b7a-4b73-ad4f-015c7a20fb5a,1004,1.0,1.0,1.0
3,fb188ae2-2173-4137-9236-19a137a402c2,1004,3.0,3.0,3.0
4,19a3e9ea-fce1-40b7-9457-2618970beb7b,1004,1.0,1.0,1.0


In [0]:
TRAIN_PATH = '/content/drive/My Drive/Beat-PD/Train_Dataset/Data_Train_CIS-PD/Data_Train_CIS-PD/cis-pd.training_data/training_data/' 
LABEL_PATH = '/content/drive/My Drive/Beat-PD/Train_Dataset/Data_Train_CIS-PD/Data_Train_CIS-PD/cis-pd.data_labels/data_labels/CIS-PD_Training_Data_IDs_Labels.csv'

def preprocessing(name, train_path):
    # Get data for training
    df = pd.read_csv(os.path.join(train_path, name + '.csv'))
    df.drop('Timestamp', axis=1, inplace=True)
    data = torch.tensor(df.values).float()
    data = torch.unsqueeze(data, 0)
    data = torch.unsqueeze(data, 0)

    # Get label for training
    label = LABEL_DF.loc[LABEL_DF['measurement_id'] == name, FEATURES]
    label = torch.tensor(label.values).long()
    return data, label

# Create DataGenerator

In [0]:
class fundus_dataset(Dataset):
    def __init__(self, path_folder_csv, file_names, path_label_csv):
        super().__init__()

        self.folder = path_folder_csv
        self.file_names = file_names
        self.label_df = pd.read_csv(path_label_csv)

    def __len__(self):
        return len(self.file_names)
    
    def __getitem__(self, index):
        # Get data for training
        df = pd.read_csv(os.path.join(self.folder, self.file_names[index] + '.csv'))
        df.drop('Timestamp', axis=1, inplace=True)
        data = torch.tensor(df.values)
        data = torch.unsqueeze(data, 0)
        data = torch.unsqueeze(data, 0)

        # Get label for training
        label = self.label_df.loc[self.label_df['measurement_id'] == self.file_names[index], FEATURES[INDEX_FEATURE]]
        label = torch.tensor(label.values[0])
        return data.float(), label

In [0]:
def custom_collate(batch):
    data = [item[0] for item in batch]
    label = [item[1] for item in batch]
    target = torch.tensor(label)
    return data, target

In [0]:
train_dataset = fundus_dataset(TRAIN_PATH, train_list[FEATURES[INDEX_FEATURE]], LABEL_PATH)
train_dataloader = DataLoader(dataset=train_dataset, 
                              batch_size=BATCH_SIZE, 
                              shuffle=True, 
                              num_workers=8, 
                              pin_memory=True, 
                              collate_fn=custom_collate)

In [0]:
val_dataset = fundus_dataset(TRAIN_PATH, val_list[FEATURES[INDEX_FEATURE]], LABEL_PATH)
val_dataloader = DataLoader(dataset=val_dataset, 
                            batch_size=1, 
                            shuffle=True, 
                            num_workers=8,  
                            pin_memory=True)

# Design Model

In [0]:
class Model_1(nn.Module):
    def __init__(self, num_classes):
        super(Model_1, self).__init__()
        
        self.out_conv1 = 64
        self.out_conv2 = 128
        self.num_classes = num_classes

        # extract features
        conv1 = nn.Conv2d(in_channels=1, out_channels=self.out_conv1, kernel_size=(10, 3), stride=(1, 1), padding=(1,1))
        conv2 = nn.Conv2d(in_channels=self.out_conv1, out_channels=self.out_conv2, kernel_size=(5, 3), stride=(5, 3))
        # extract relationship between each features
        LSTM1 = nn.LSTM(input_size = 1, hidden_size = 1, num_layers=1)

        # sequential
        self.cnn = nn.Sequential(
            conv1, nn.ReLU(), 
            conv2, nn.ReLU()) 
        
        self.lstm = nn.Sequential( 
            LSTM1)

        self.fc = nn.Sequential(
            nn.Linear(self.out_conv2, num_classes))

    def forward(self, input):

        x1 = self.cnn(input).transpose(1, 2).view(-1, self.out_conv2, 1)
        # print(x1.shape)
        x2, hc_t = self.lstm(x1)
        # print(x2.shape)
        x3 = x2[-1].view(self.out_conv2)
        # print(x3.shape)
        x4 = self.fc(x3)
        # print(x4.shape)
        return x4


In [13]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [14]:
model = Model_1(num_classes = 1).to(device)
model

Model_1(
  (cnn): Sequential(
    (0): Conv2d(1, 64, kernel_size=(10, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 128, kernel_size=(5, 3), stride=(5, 3))
    (3): ReLU()
  )
  (lstm): Sequential(
    (0): LSTM(1, 1)
  )
  (fc): Sequential(
    (0): Linear(in_features=128, out_features=1, bias=True)
  )
)

In [0]:
# x, y = train_dataset[0]
# model(x.to(device))

# Loss and Optim

In [0]:
loss = nn.MSELoss()

optimizer = optim.Adam(model.parameters(), 
                   lr=LEARNING_RATE, 
                   betas=(0.9, 0.999), 
                   eps=1e-8, 
                   weight_decay=1e-5)

lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 
                                                    factor = LEARNING_RATE_SCHEDULE_FACTOR, 
                                                    patience = LEARNING_RATE_SCHEDULE_PATIENCE, 
                                                    mode = 'min',
                                                    verbose=True)

# Training each epoch

In [0]:
def epoch_training(epoch, model, train_dataset, device, loss_criteria, optimizer):
    model.train()
    training_loss = 0

    for batch, (xx, yy) in enumerate(train_dataset):
        optimizer.zero_grad()
        yhats = torch.FloatTensor().to(device)
        yy = yy.to(device)

        for (x, y) in zip(xx, yy):
            x = x.to(device)
            yhat = model(x)
            yhats = torch.cat((yhats, yhat), 0)

        # print(f"y={yy}, yhats = {yhats}")
        loss = loss_criteria(yhats, yy)
        loss.backward()
        optimizer.step()
        training_loss += loss.item()

        # #
        # if batch == 0:
        #     break
        # #
        del x, y, loss, yhats
    del xx, yy
    if torch.cuda.is_available(): torch.cuda.empty_cache()

    return training_loss/len(train_dataset)

# Evaluate Model

In [0]:
def evaluating(epoch, model, val_dataset, device, loss_criteria):
    model.eval()
    val_loss = 0       

    with torch.no_grad(): 
        for step, (xx, yy) in enumerate(val_dataset):
            x = xx.to(device).squeeze(0)
            yy = yy.to(device)
            yhats = model(x)
            
            # print(f"yhat={yhats}/n y={yy}")
            loss = loss_criteria(yhats, yy)
            val_loss += loss

            # #
            # if step == 0:
            #     break
            # #

    del xx, yy, loss
    if torch.cuda.is_available(): torch.cuda.empty_cache()

    return val_loss/len(val_dataset)

# Fully Training

In [0]:
name_model = 'model_3_tremor.pth'

In [0]:
best_mse = 10
model_path = '/content/drive/My Drive/Beat-PD/CNN-LSTM-FC/pretrained_model/' + name_model

info_df = pd.DataFrame({
                       "epoch": [],
                       "train_loss": [],
                       "val_loss": []})

for epoch in tqdm(range(MAX_EPOCHS)):
    train_loss = epoch_training(epoch, model, train_dataloader, device, loss, optimizer)
    val_loss = evaluating(epoch, model, val_dataloader, device, loss)

    print(f"\nEpoch {epoch}: training losses= {train_loss}; val_loss= {val_loss};")
    info_df = info_df.append(pd.DataFrame({"epoch":[epoch], 
                                 "train_loss":[train_loss], 
                                 "val_loss":[val_loss.item()]}), ignore_index=True)

    lr_scheduler.step(val_loss)

    if best_mse > val_loss:
        print(f"In epoch {epoch} - Improve MSE from {best_mse} to {val_loss}")
        best_mse = val_loss
        torch.save(model.state_dict(), model_path)

  0%|          | 0/30 [00:00<?, ?it/s]


Epoch 0: training losses= 0.8703176793642342; val_loss= 0.8147740364074707;
In epoch 0 - Improve MSE from 10 to 0.8147740364074707


  7%|▋         | 2/30 [17:23<4:10:53, 537.62s/it]


Epoch 1: training losses= 0.7517907903529704; val_loss= 0.8159471154212952;


 10%|█         | 3/30 [25:23<3:54:09, 520.35s/it]


Epoch 2: training losses= 0.7554933126084507; val_loss= 0.8162761330604553;


 13%|█▎        | 4/30 [33:29<3:40:55, 509.84s/it]


Epoch 3: training losses= 0.753062330186367; val_loss= 0.8235102295875549;


 17%|█▋        | 5/30 [41:31<3:29:01, 501.67s/it]


Epoch 4: training losses= 0.7440880043432117; val_loss= 0.8178508877754211;


 20%|██        | 6/30 [49:36<3:18:40, 496.67s/it]


Epoch 5: training losses= 0.7457822910510004; val_loss= 0.8171606659889221;


 23%|██▎       | 7/30 [57:37<3:08:37, 492.08s/it]


Epoch 6: training losses= 0.7491705087013543; val_loss= 0.8221998810768127;
Epoch     7: reducing learning rate of group 0 to 1.0000e-04.


 27%|██▋       | 8/30 [1:05:44<2:59:52, 490.55s/it]


Epoch 7: training losses= 0.7320969915017486; val_loss= 0.8229222893714905;


 30%|███       | 9/30 [1:13:44<2:50:31, 487.24s/it]


Epoch 8: training losses= 0.7330708946101367; val_loss= 0.8223690986633301;


 33%|███▎      | 10/30 [1:21:39<2:41:11, 483.55s/it]


Epoch 9: training losses= 0.7307287929579616; val_loss= 0.8242908120155334;


 37%|███▋      | 11/30 [1:29:42<2:33:06, 483.47s/it]


Epoch 10: training losses= 0.730591916712001; val_loss= 0.8224362730979919;


# Chart and Result

In [0]:
def draw_chart(df):
    df.plot(kind='line',x='epoch',y=['train_loss', 'val_loss'], figsize=(12,5))
    plt.show()

In [0]:
print("Best MSE = ",best_mse)
draw_chart(info_df)