In [1]:
import os
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torch import nn
from torchvision import transforms
import matplotlib.pyplot as plt
import torch
import random
import torch.nn.functional as F
from torch.utils.data.sampler import SubsetRandomSampler
import pickle
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import ParameterGrid
from tqdm import tqdm
from datetime import datetime
import gc
gc.collect()
torch.cuda.empty_cache()

random_seed = 1234
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed)  # if use multi-GPU
#torch.backends.cudnn.deterministic = True
#torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# os.environ['CUDA_VISIBLE_DEVICES'] = '1, 2' # '0'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device('cuda')
print(device)

final_dict = {}


cuda


# Data 

In [2]:
X_train_ = torch.load('data/Training/X_train_diet.pt') # torch.Size([16008, 240, 320, 1]) # X_train_diet.pt # X_train.pt
X_test_ = torch.load('data/Testing/X_test_diet.pt') # torch.Size([1596, 240, 320, 1]) # X_test_diet.pt # X_test.pt

Y_train_ = pd.read_csv('data/Training/Y_train.csv')
Y_test_ = pd.read_csv('data/Testing/Y_test.csv')
# dataframe to tensor
Y_train_ = torch.tensor(Y_train_.values)  # torch.Size([16008, 63])
Y_test_ = torch.tensor(Y_test_.values)  # torch.Size([1596, 63])

In [3]:
X_train_ = X_train_.float()
X_test_ = X_test_.float()

mean = 1881.42
std = 12.29

# Standardise
X_train_ -= mean
X_train_ /= std
X_test_ -= mean
X_test_ /= std

In [4]:
# permute data

X_train_ = X_train_.permute([0, 3, 1, 2])
X_train_.shape

X_test_ = X_test_.permute([0, 3, 1, 2])
X_test_.shape

# torch.Size([1596, 1, 240, 320])


torch.Size([1596, 1, 240, 320])

In [5]:
def sample_and_split_dataloader(X_train_, X_test_, Y_train_, Y_test_, sample_size, valid_size, batch_size):

    # sample train set
    n_train = len(X_train_)
    indices = list(range(n_train))
    np.random.shuffle(indices)
    split = int(np.floor(sample_size * n_train))
    sample_indices = indices[:split]
    X_train_ = X_train_[sample_indices]
    Y_train_ = Y_train_[sample_indices]

    # # sample test set
    # n_test = len(X_test_)
    # indices = list(range(n_test))
    # np.random.shuffle(indices)
    # split = int(np.floor(sample_size * n_test))
    # sample_indices = indices[:split]
    # X_test_ = X_test_[sample_indices]
    # Y_test_ = Y_test_[sample_indices]

    # split train, valid set
    n_train = len(X_train_)                  
    indices = list(range(n_train))
    np.random.shuffle(indices)

    split = int(np.floor(valid_size * n_train)) 
    train_idx, valid_idx = indices[split:], indices[:split]

    train_set = TensorDataset(X_train_, Y_train_)
    train_sampler, valid_sampler = SubsetRandomSampler(train_idx), SubsetRandomSampler(valid_idx) 
    train_loader = DataLoader(train_set, batch_size=batch_size, sampler=train_sampler)
    valid_loader = DataLoader(train_set, batch_size=batch_size, sampler=valid_sampler)

    # test loader
    test_set = TensorDataset(X_test_, Y_test_)
    test_loader = DataLoader(test_set, batch_size=1)

    # print shape
    print('\n Sample size: ', sample_size)
    print('train_loader: ', len(train_loader)*train_loader.batch_size )
    print('valid_loader: ', len(valid_loader)*valid_loader.batch_size )
    print('test_loader: ', len(test_loader)*test_loader.batch_size )

    return train_loader, valid_loader, test_loader


# Model

In [6]:
class my_CNN(nn.Module): 

    def __init__(self, n_class, CHANNELS, KERNEL_SIZE, STRIDE, PADDING):
        # super(my_CNN, self).__init__()
        super().__init__()
        self.n_class = n_class
        self.cs = CHANNELS
        self.KERNEL_SIZE = KERNEL_SIZE
        self.STRIDE = STRIDE
        self.PADDING = PADDING
        # self.dropout - nn.Dropout(0.4)

        self.layer1 = nn.Sequential(
            nn.Conv2d(1, self.cs[0], kernel_size=KERNEL_SIZE, stride=STRIDE, padding=PADDING), # 240 * 320 * self.cs[0]
            nn.BatchNorm2d(self.cs[0]),
            nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2, stride=2) # 120 * 160 * self.cs[0]
        ) 
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(self.cs[0], self.cs[1], kernel_size=KERNEL_SIZE, stride=STRIDE, padding=PADDING), # 120 * 160 * self.cs[1]
            nn.BatchNorm2d(self.cs[1]),
            nn.ReLU()
            # nn.MaxPool2d(kernel_size=2, stride=2) # 60 * 80 * self.cs[1]
        )

        self.layer3 = nn.Sequential(
            nn.Conv2d(self.cs[1], self.cs[2], kernel_size=KERNEL_SIZE, stride=STRIDE, padding=PADDING), # 120 * 160 * self.cs[2]
            nn.BatchNorm2d(self.cs[2]),
            nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2, stride=2)  # 60 * 80 * self.cs[2]
        )
        
        self.fc = None

    def forward(self, x):
        
        out1 = self.layer1(x) 
        out2 = self.layer2(out1)
        out3 = self.layer3(out2)
        out4 = out3.reshape(out3.size(0), -1) 
        ###
        shape1, shape2 = out3.shape[2], out3.shape[3]
        self.fc = nn.Linear(shape1 * shape2 * self.cs[2], self.n_class).to(device)
        ###
        out4 = out4.to(device)
        out = self.fc(out4)  #######################################################

        # print(x.shape)  # torch.Size([1, 1, 240, 320])
        # print(out1.shape)  # torch.Size([1, 16, 120, 160])
        # print(out2.shape)  # torch.Size([1, 32, 120, 160])
        # print(out3.shape)  # torch.Size([1, 64, 60, 80])
        # print(out4.shape) # torch.Size([1, 307200])

        return out


# MatMul shape 맞는지 체크

# a = my_CNN(n_class=63, KERNEL_SIZE=KERNEL_SIZE, STRIDE=STRIDE, PADDING=PADDING)
# a(torch.randn(1, 1, 240, 320))  # B C H W 로 가짜 데이터 넣어보고 체크


In [7]:
def train(model):
    
    optimizer = torch.optim.SGD(model.parameters(), lr = LR)
    criterion = nn.MSELoss()
    valid_loss_min = np.inf # 초기화 (나중에 업데이트 함)
    records = {}

    for epoch in tqdm(range(1, Train_epoch + 1)):

        train_loss = 0.0
        valid_loss = 0.0

        for batch_id, (image, label) in enumerate(train_loader): # iter: batch 데이터 (25개) 

            label, image = label.to(device), image.float().to(device) # shape: (25,)
            output = model(image)   # 1. 모델에 데이터 입력해 출력 얻기 # 10개 클래스에 대한 로짓 # shape: (25, 10)
            loss = criterion(output.float(), label.float()) # 2. loss 계산 
            train_loss += loss.item()
            
            optimizer.zero_grad() # 3. 기울기 초기화 (iter 끝날때마다 초기화)
            loss.backward() # 4. 역전파
            optimizer.step() # 5. 최적화
        
        for batch_id, (image, label) in enumerate(valid_loader):

            label, image = label.to(device), image.float().to(device)
            output = model(image)
            loss = criterion(output.float(), label.float())
            valid_loss += loss.item()
        
        # calculate avg losses
        train_loss = train_loss/len(train_loader.dataset)
        valid_loss = valid_loss/len(valid_loader.dataset)

        # print training/validation records 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(epoch, train_loss, valid_loss))
        # save training/validation records 
        records[f'epoch_{epoch}'] = [train_loss, valid_loss]
        # save model if validation loss has decreased
        if valid_loss <= valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min, valid_loss))
            torch.save(
                model, 
                f'./records/CNN_best_model_kernel_{model.KERNEL_SIZE}_stride_{model.STRIDE}.pt')
            torch.save(
                model.state_dict(), 
                f'./records/CNN_best_model_kernel_{model.KERNEL_SIZE}_stride_{model.STRIDE}.pth')
            valid_loss_min = valid_loss
    # save records
    pd.DataFrame(records).to_csv(
        f'./records/CNN_training_kernel_{model.KERNEL_SIZE}_stride_{model.STRIDE}.csv', index=False)
    
    return model


def test(model):

    print('success load best_model')
    criterion = nn.MSELoss()

    test_loss = 0.0
    with torch.no_grad():  # 파라미터 업데이트 안 함

        for batch_id, (image, label) in enumerate(tqdm(test_loader)):

            label, image = label.to(device), image.float().to(device)
            output = model(image)
            loss = criterion(output.float(), label.float())
            test_loss += loss.item()

    # calculate avg losses
    test_loss = test_loss/len(test_loader.dataset)

    return test_loss


# Main 

In [8]:
# training parameters

n_class = 63
Train_epoch = 30
BATCH_SIZE = 128
LR = 5e-2

# ablation studies

batch_norm = None
activation = None
max_pooling = None


In [9]:
acc_runtime_dict = {}

# hyperparameters
param_grid = {
    'CHANNELS': [[3, 6, 9]],
    'KERNEL_SIZE' : [3, 6],                  ###
    'STRIDE' : [1, 5],                       ###
    'PADDING' : [0]                          
}

grid_list = list(ParameterGrid(param_grid))

In [10]:
if __name__ == '__main__':
    
    sample_size_list = [0.3, 0.6, 1.0]

    for sample_size in tqdm(sample_size_list):

        train_loader, valid_loader, test_loader = sample_and_split_dataloader(
            X_train_, X_test_, Y_train_, Y_test_, sample_size=sample_size, valid_size=0.2, batch_size=BATCH_SIZE)


        for case in tqdm(grid_list):

            # try:
            regr = my_CNN(n_class=63, CHANNELS=case['CHANNELS'], KERNEL_SIZE=case['KERNEL_SIZE'],
                          STRIDE=case['STRIDE'], PADDING=case['PADDING']
                    ).to(device)

            start_time = datetime.now()
            model_trained = train(regr)
            train_time = datetime.now() - start_time

            acc_runtime_dict[str(case)] = dict({'train_time': train_time})
            # except:
            #     pass

        final_dict[sample_size] = acc_runtime_dict


  0%|          | 0/3 [00:00<?, ?it/s]


 Sample size:  0.3
train_loader:  3968
valid_loader:  1024
test_loader:  1596



[A

Epoch: 1 	Training Loss: 0.000899 	Validation Loss: 0.000170
Validation loss decreased (inf --> 0.000170).  Saving model ...



[A

Epoch: 2 	Training Loss: 0.000658 	Validation Loss: 0.000150
Validation loss decreased (0.000170 --> 0.000150).  Saving model ...



[A

Epoch: 3 	Training Loss: 0.000556 	Validation Loss: 0.000144
Validation loss decreased (0.000150 --> 0.000144).  Saving model ...



[A

Epoch: 4 	Training Loss: 0.000507 	Validation Loss: 0.000120
Validation loss decreased (0.000144 --> 0.000120).  Saving model ...



[A
[A

Epoch: 5 	Training Loss: 0.000443 	Validation Loss: 0.000120
Epoch: 6 	Training Loss: 0.000422 	Validation Loss: 0.000099
Validation loss decreased (0.000120 --> 0.000099).  Saving model ...



[A

Epoch: 7 	Training Loss: 0.000383 	Validation Loss: 0.000094
Validation loss decreased (0.000099 --> 0.000094).  Saving model ...



[A

Epoch: 8 	Training Loss: 0.000352 	Validation Loss: 0.000088
Validation loss decreased (0.000094 --> 0.000088).  Saving model ...



[A

Epoch: 9 	Training Loss: 0.000339 	Validation Loss: 0.000087
Validation loss decreased (0.000088 --> 0.000087).  Saving model ...



[A

Epoch: 10 	Training Loss: 0.000321 	Validation Loss: 0.000083
Validation loss decreased (0.000087 --> 0.000083).  Saving model ...



[A

Epoch: 11 	Training Loss: 0.000308 	Validation Loss: 0.000080
Validation loss decreased (0.000083 --> 0.000080).  Saving model ...



[A

Epoch: 12 	Training Loss: 0.000299 	Validation Loss: 0.000078
Validation loss decreased (0.000080 --> 0.000078).  Saving model ...



[A

Epoch: 13 	Training Loss: 0.000297 	Validation Loss: 0.000074
Validation loss decreased (0.000078 --> 0.000074).  Saving model ...



[A

Epoch: 14 	Training Loss: 0.000279 	Validation Loss: 0.000071
Validation loss decreased (0.000074 --> 0.000071).  Saving model ...



[A

Epoch: 15 	Training Loss: 0.000283 	Validation Loss: 0.000070
Validation loss decreased (0.000071 --> 0.000070).  Saving model ...



[A
[A

Epoch: 16 	Training Loss: 0.000274 	Validation Loss: 0.000072


In [None]:
# save
with open('result_CNN.pickle', 'wb') as f:
    pickle.dump(final_dict, f, pickle.HIGHEST_PROTOCOL)

# load
with open('result_CNN.pickle', 'rb') as f:
    result_RF = pickle.load(f)


# test는 따로 best model을 torch.load로 가져와서 구하기

In [None]:
best_model = torch.load('./records/CNN_best_model_kernel_3_stride_1.pt')
test_loss = test(best_model)

success load best_model


100%|██████████| 1596/1596 [07:43<00:00,  3.45it/s]


# training 시각화 

In [None]:
pd.read_csv('./records/CNN_training_kernel_3_stride_1.csv')

Unnamed: 0,epoch_1
0,0.0
1,7.5e-05


In [None]:
test_loss
