In [1]:
import os
from glob import glob
import numpy as np
import matplotlib.pyplot as plt
from tqdm.auto import tqdm, trange
import random
import pandas as pd
from PIL import Image
from pathlib import Path
from collections import OrderedDict
from time import time, ctime, localtime
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import Dataset,DataLoader
from torchsummary import summary


import torchvision
import torchvision.transforms as transforms

from torchvision import datasets
from torchvision import models

from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split,train_test_split
from sklearn.model_selection import StratifiedKFold,  KFold

torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.cuda.manual_seed_all(0)
np.random.seed(0)
random.seed(0)

In [2]:
# path = "D:/SleepDataHallym/"
path = "/home/nyh/SleepDataSets/"

train_list = glob(path + "train/*")
print(f"number of train dataset is : {train_list.__len__()}")

test_list = glob(path + "test/*")
print(f"number of test dataset is : {test_list.__len__()}")

number of train dataset is : 68
number of test dataset is : 64


In [3]:
import torch 
device = 'cpu'
if torch.cuda.is_available():
    device = 'cuda'
    
device

'cuda'

In [5]:
from itertools import combinations
n = 10
arr = list(range(0,10))
result = []
for i in range(0, n):
    comb = combinations(arr, i)
    for element in [*comb]:
        if element.__len__() != 0 : result.append(element)
         # element.__len__() != 1
# print(result)
    

In [6]:
# path = "D:/SleepDataHallym/"
path = "/home/nyh/SleepDataSets/"

train_list = glob(path + "train/*")
print(f"number of train dataset is : {train_list.__len__()}")

test_list = glob(path + "test/*")
print(f"number of test dataset is : {test_list.__len__()}")

number of train dataset is : 68
number of test dataset is : 64


In [7]:
class CustomTrainDataset(Dataset):
    def read_dataset(self):
        all_signals_files = []
        all_labels = []

        for dataset_folder in self.patient_path:
            
            # print(dataset_folder)
            
            signals_path = dataset_folder + "/"
            signals_list = os.listdir(signals_path)
            # print(signals_list)
            signals_list.sort()
            for signals_filename in signals_list:
                signals_file = signals_path+signals_filename 
                all_signals_files.append(signals_file)
                all_labels.append(int(signals_filename.split('.npy')[0].split('_')[-1]))
                
        return all_signals_files, all_labels, len(all_signals_files)
    
    
    def __init__(self, mode, use_rnn, use_channel = [0,1], transform=None, target_transform=None):
        # self.path = "D:/SleepDataHallym/"
        self.path = "/home/nyh/SleepDataSets/"
        
        if mode == "train": 
            self.patient_path = glob(self.path + "train/*")
            self.patient_list,self.labels,self.length = self.read_dataset()
        elif mode == "test": 
            self.patient_path = glob(self.path + "test/*")
            self.patient_list,self.labels,self.length = self.read_dataset()
        else: return -1

        self.transform = transform
        self.target_transform = target_transform
        self.signal = []
        self.annotation = []
        self.use_rnn = use_rnn
        self.use_channel = use_channel
        
    def __len__(self):
        # print(len(self.patient_list))
        return len(self.patient_list)
    
    def print_line(cnt, folder_name):
        print(f"{cnt}th file name is {folder_name[-6:]} patient collect", end= "\t\t")

        if cnt%3 == 0:
            print()
        
    def __getitem__(self,index):
        
        if self.use_rnn == True:
            folder_name = '/'.join(self.patient_list[index].split('/')[:-1]) + '/'
            folder_list = os.listdir(folder_name)
            folder_list.sort()
            folder_length = len(folder_list)-1
            
            # train/patient_num_OSA/current_epoch
            current_epoch = int(self.patient_list[index].split('/')[-1].split('_')[0]) 
            start_index, end_index = current_epoch - 2, current_epoch + 3             

            labels = int(folder_list[current_epoch].split('.npy')[0].split('_')[-1])
            
            signals = None
            input_signals = None
            
            count = 0
            for i in range(start_index,end_index):
                if i <= 0:
                    c_signals = np.load(folder_name+folder_list[0])
                    c_signals = c_signals[self.use_channel,:]             
                    input_signals = torch.Tensor(c_signals)

                elif i >= folder_length:
                    c_signals = np.load(folder_name+folder_list[folder_length])
                    c_signals = c_signals[self.use_channel,:]
                    input_signals = torch.Tensor(c_signals)
                else:
                    c_signals = np.load(folder_name+folder_list[i])
                    c_signals = c_signals[self.use_channel,:]
                    input_signals = torch.Tensor(c_signals)
                    
                input_signals = input_signals.reshape(1, self.use_channel.__len__(), 6000)
                
                if count == 0:
                    signals = input_signals

                else:
                    # print(signals, type(signals))
                    signals = torch.cat((signals, input_signals), 0)

                count += 1
                        
                # print(signals.shape, labels)
                # print(input_signals.shape)

            return signals, labels

In [8]:
import numpy as np
from torch.utils.data.sampler import SubsetRandomSampler # 실습 때 사용하였던 SubsetRandomSampler 사용

random.seed(0)

train_list = CustomTrainDataset(mode = "train", use_rnn = True,  use_channel = [2,3,4])
test_list = CustomTrainDataset(mode = "test", use_rnn = True,  use_channel = [2,3,4])

train_size = 0.8 # train size 0.9 , validation size = 0.1
num_train = train_list.__len__() # train_data의 크기를 저장 
indices = list(range(num_train)) # train_Data 크기에 맞는 인덱스 생성 
np.random.shuffle(indices)   #  인덱스를 섞어야 라벨과 데이터가 안섞임
split = int(np.floor(train_size * num_train)) # 학습 데이터의 비율과 총 데이터의 크기를 곱한다음 np.floor를 통해 버림을 합니다.
train_idx, valid_idx = indices[:split], indices[split:] # 이제 split 된 크기만큼 train과 validation 데이터 셋을 나눠 줍니다. 
 # 이제 나눈 인덱스의 번호로 SubsetSampler를 통해 넣어줍니다
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

In [9]:
train_list.__len__()

49265

In [10]:
trainloader = DataLoader(dataset=train_list,batch_size=64, sampler= train_sampler, num_workers = 2)
validloader = DataLoader(dataset=train_list,batch_size=64, sampler= valid_sampler, num_workers = 2)
testloader = DataLoader(dataset=test_list,batch_size=64, shuffle=True, num_workers = 2)

In [11]:
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable


class DeepSleepNet(nn.Module):
    def __init__(self, in_channel = 3, small_Fs = 8, big_Fs = 6, Fs = 200):
        super().__init__()

        
        
        if torch.cuda.is_available(): self.use_gpu = True
        else: self.use_gpu = False
        self.num_directions = 2 #  bidirctionalLSTM 
        self.num_layers = 1
        self.hidden_dim = 512
        


        self.smallCNN = nn.Sequential(
            nn.Conv1d(in_channels = in_channel, out_channels = 64 , kernel_size = Fs//2 , stride = Fs//16, padding = Fs//2//2, bias = False),
            nn.BatchNorm1d(64),
            nn.ReLU(True),

            nn.MaxPool1d(kernel_size = 8, stride = 8,  padding = 4),
            nn.Dropout(0.5),

            nn.Conv1d(in_channels = 64 , out_channels = 128 , kernel_size = small_Fs, 
                        stride = 1, padding= small_Fs//2, bias = False),
            nn.BatchNorm1d(128),
            nn.ReLU(True),

            nn.Conv1d(in_channels = 128 , out_channels = 128 , kernel_size = small_Fs, 
                        stride = 1, padding = small_Fs//2, bias = False),
            nn.BatchNorm1d(128),
            nn.ReLU(True),

            nn.Conv1d(in_channels = 128 , out_channels = 128 , kernel_size = small_Fs, 
                        stride = 1, padding= small_Fs//2, bias = False),
            nn.BatchNorm1d(128),
            nn.ReLU(True),

            nn.MaxPool1d(kernel_size = 4, stride = 4,  padding = 2)
        )


        self.bigCNN = nn.Sequential(
            nn.Conv1d(in_channels = in_channel, out_channels = 64 , kernel_size = Fs*4 , stride = Fs//2, padding = Fs*4//2, bias = False),
            nn.BatchNorm1d(64),
            nn.ReLU(True),

            nn.MaxPool1d(kernel_size = 4, stride = 4,  padding = 2), # padding = filter_size//2
            nn.Dropout(0.5) ,

            nn.Conv1d(in_channels = 64 , out_channels = 128 , kernel_size = big_Fs, stride = 1, padding= big_Fs//2, bias = False),
            nn.BatchNorm1d(128),
            nn.ReLU(True),

            nn.Conv1d(in_channels = 128 , out_channels = 128 , kernel_size = big_Fs, stride = 1, padding= big_Fs//2, bias = False),
            nn.BatchNorm1d(128),
            nn.ReLU(True),

            nn.Conv1d(in_channels = 128 , out_channels = 128 , kernel_size = big_Fs, stride = 1, padding= big_Fs//2, bias = False),
            nn.BatchNorm1d(128),
            nn.ReLU(True),

            nn.MaxPool1d(kernel_size = 2, stride = 2,  padding = 1)
        )

        self.lstm_layer1 = nn.LSTM(1280 + 2176, 512, bidirectional = True)
        self.lstm_layer2 = nn.LSTM(512 * 2, 512, bidirectional = True)
        self.shortcut = nn.Sequential(
            nn.Linear(1280 + 2176, 1024)
        )

        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(1024, 6)  # big and small conv concat     
        )

        self.dropout = nn.Dropout(p = 0.5)

    def init_hidden1(self, order_signal):
        # first = h(hidden) / second = c(cell)
        if self.use_gpu:
            return (
            Variable(torch.zeros(self.num_directions * self.num_layers, order_signal, self.hidden_dim).cuda()),  # hidden
            Variable(torch.zeros(self.num_directions * self.num_layers, order_signal, self.hidden_dim).cuda()))  # cell
        else:
            return (Variable(torch.zeros(self.num_directions * self.num_layers, order_signal, self.hidden_dim)),  # hidden
                    Variable(torch.zeros(self.num_directions * self.num_layers, order_signal, self.hidden_dim)))  # cell


    def forward(self, x):
        lstm_output = None
        order_signal = x.shape[1]
        
        for i in range(order_signal):
            # print(x.shape)
            cnn = x[:, i, :, :]
            # print(cnn.shape)
            small = self.smallCNN(cnn)
            big = self.bigCNN(cnn)

            feature_big = torch.flatten(big, 1)
            feature_small = torch.flatten(small, 1)  # (batch, channel)

            cnn_output = torch.cat((feature_big, feature_small), dim=1)
            
            if i == 0:
                lstm_output = cnn_output
            else:
                lstm_output = torch.cat((lstm_output, cnn_output), 1)
        
        
        batch_size = lstm_output.size(0)
        lstm_output = lstm_output.reshape(batch_size, -1, 1280+2176) # Afer concat, We got [batchsize , 5 * (1280 + 2176)] -> we append 2 startindex and endindex 3

        self.hidden1 = self.init_hidden1(order_signal)
        output, self.hidden1 = self.lstm_layer1(lstm_output, self.hidden1)

        lstm_output = self.dropout(lstm_output)
        lstm_shortcut = self.shortcut(lstm_output)
        
        lstm_output, _ = self.lstm_layer1(lstm_output)
        lstm_output = self.dropout(lstm_output)
        lstm_output, _ = self.lstm_layer2(lstm_output)
        lstm_output = self.dropout(lstm_output)

        lstm_output = torch.add(lstm_output, lstm_shortcut)
        output = self.dropout(lstm_output)
        output = self.fc(output)

        return(output[:, 2, :]) # return middel signal classification


In [12]:
from sched import scheduler
import warnings
warnings.filterwarnings('ignore')


epochs = 100 # 반복 수는 100
batch_size= 64
cnt = 0      # early stopping을 적용하기 위해 만들어놓은 cnt

# train 및 val Loss 저장 
train_loss = torch.zeros(epochs)
val_loss = torch.zeros(epochs)
test_loss = torch.zeros(epochs)

# train 및 val Accuracy 저장 
train_acc = torch.zeros(epochs)
val_acc = torch.zeros(epochs)
test_acc = torch.zeros(epochs)

# 초기 Loss값은 무한대
valid_loss_min = np.Inf

# epochs 수만큼 학습 진행
print("===== Start Learning =====")

model = DeepSleepNet()

if torch.cuda.device_count() > 1: # GPU가 2개 이상이라면
    model = nn.DataParallel(model) # nn.DataParallel을 사용하여 GPU 병렬 처리 진행

model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, betas = [0.9, 0.999]) # 기본적인 Adam 사용 후 lr 및 Optimizier 변경 예정
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)

for epoch in tqdm(range(epochs)):
    
    model.train()
    for inputs, labels in tqdm(trainloader, leave=True):    
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad() # optimizer 초기화 -> 모든 gradient를 초기화 시켜줌으로써 이전에 사용했던 기울기에 더해지지않고 새로 구하게 됨
        logits = model.forward(inputs) # validation set인 input값을 넣어서 실행       
        loss = criterion(logits, labels) # criterion은 이전에 정의한 CrossEntropy를 통해 Loss 계산
        loss.backward() # backward를 통해 역전파 실행 계산된 loss를 가지고 모델의 파라미터 개선
        optimizer.step() # optimizer.step()을 통해 개선된 파라미터 적용

        train_loss[epoch] += loss.item() # 에포크당 train_loss 누적

        ps = F.softmax(logits, dim=1) # softmax함수를 통한 정규화 (0 ~ 1) 사이의 확률로 만들어줌
        top_p, top_class = ps.topk(1, dim=1) # topk를 통해 가장 높은 한개를 뽑음

        equals = top_class == labels.reshape(top_class.shape)   # 일치하는지 확인.
        train_acc[epoch] += torch.mean(equals.type(torch.FloatTensor)).item()  # 정확도 계산을 위해 float로 타입 변환 후 mean 계산.

    # Loss의 평균을 구하기
    train_loss[epoch] /= len(trainloader)
    train_acc[epoch] /= len(trainloader)
    
    model.eval()   #dropout Layer와 BatchNormLayer는 eval과정에서 필요하지 않기 때문
    with torch.no_grad():  # validation 과정 no_grad()를 통해 Gradient 계산 안함.
        # validloder에 넣어둔 값 들고오기

        for lists in tqdm(validloader, leave=True):
            inputs, labels = inputs.to(device), labels.to(device)
            # 위에서 train 후 validation 실행. train한번 당 validation 1번 실행.

            logits = model.forward(inputs) # validation set인 input값을 넣어서 실행 
            batch_loss = criterion(logits, labels).detach() # validation set의 Loss 계산
            val_loss[epoch] += batch_loss.item() # Loss 값 누적

            # Calculate accuracy
            ps = F.softmax(logits, dim=1) # 확률값 구하기 (0~1) 사이로 정규화가 됨
            top_p, top_class = ps.topk(1, dim=1) # 가장 높은값 하나를 고르는데 그 값과 idx가져옴
            equals = top_class == labels.view(*top_class.shape) # 일치하는지 확인 
            val_acc[epoch] += torch.mean(equals.type(torch.FloatTensor)).item() # # 정확도 계산을 위해 float로 타입 변환 후 mean 계산.

        # validation Loss 및 accuracy 평균냄
        val_loss[epoch] /= len(validloader)
        val_acc[epoch] /= len(validloader)
    
    ####### Test ######
    with torch.no_grad():  # validation 과정 no_grad()를 통해 Gradient 계산 안함.
        # validloder에 넣어둔 값 들고오기

        for lists in tqdm(testloader, leave=True):
            inputs, labels = inputs.to(device), labels.to(device)
            # 위에서 train 후 validation 실행. train한번 당 validation 1번 실행.


            logits = model.forward(inputs) # validation set인 input값을 넣어서 실행 
            batch_loss = criterion(logits, labels).detach() # validation set의 Loss 계산\
            test_loss[epoch] += batch_loss.item() # Loss 값 누적

            # Calculate accuracy
            ps = F.softmax(logits, dim=1) # 확률값 구하기 (0~1) 사이로 정규화가 됨
            top_p, top_class = ps.topk(1, dim=1) # 가장 높은값 하나를 고르는데 그 값과 idx가져옴
            equals = top_class == labels.view(*top_class.shape) # 일치하는지 확인 
            test_acc[epoch] += torch.mean(equals.type(torch.FloatTensor)).item() # # 정확도 계산을 위해 float로 타입 변환 후 mean 계산.

        # validation Loss 및 accuracy 평균냄
        test_loss[epoch] /= len(testloader)
        test_acc[epoch] /= len(testloader)

    ##################### PRINT LOSS & ACC #####################
    print(f"Epoch {epoch+1}/{epochs}.. "
          f"Train loss: {train_loss[epoch]:.3f}.. "
          f"Train acc: {train_acc[epoch]:.3f}.. "
          f"val loss: {val_loss[epoch]:.3f}.. "
          f"val accuracy: {val_acc[epoch]:.3f}.. "
          f"test loss: {test_loss[epoch]:.3f}.. "
          f"test accuracy: {test_acc[epoch]:.3f}.. "
         )

    ##################### 최적의 모델 저장 #####################
    if val_loss[epoch] <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
        valid_loss_min,
        val_loss[epoch]))
        torch.save(model.module.state_dict(), 'model_best_channel_234.pt')
        valid_loss_min = val_loss[epoch]

        # 가장 낮은 Loss값을 가지게 된다면 Early Stopping count 초기화
        cnt = 0

    # 20번 이상 Loss 개선이 안된다면 종료
    ############# Early Stopping #############
    if cnt >= 10:
        print("Early Stopping")
        break

    cnt+=1 #Loss 개선 실패
    ########################################################
    scheduler.step()    

===== Start Learning =====


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/616 [00:00<?, ?it/s]

  0%|          | 0/154 [00:00<?, ?it/s]

  0%|          | 0/724 [00:00<?, ?it/s]

Epoch 1/100.. Train loss: 1.132.. Train acc: 0.620.. val loss: 0.723.. val accuracy: 0.769.. test loss: 0.723.. test accuracy: 0.769.. 
Validation loss decreased (inf --> 0.722809).  Saving model ...


  0%|          | 0/616 [00:30<?, ?it/s]

  0%|          | 0/154 [00:00<?, ?it/s]

  0%|          | 0/724 [00:00<?, ?it/s]

KeyboardInterrupt: 