# Import

In [1]:
#import all library
import librosa 
import librosa.display as dsp
from IPython.display import Audio

import pandas as pd
import numpy as np
from tqdm import tqdm
import os
import matplotlib.pyplot as plt
from glob import glob
from PIL import Image
import random
import time

import torch
import torchvision.datasets as datasets  
import torchvision.transforms as transforms  

from torch.utils.data import DataLoader, Dataset 

import torch.optim as optim  
import torch

  from .autonotebook import tqdm as notebook_tqdm


# GPU Setting

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')  
print(device)

cuda


# Make Log/Model Folder

In [3]:
os.makedirs('./img_logs', exist_ok=True) #make logs folder
os.makedirs('./img_model', exist_ok=True) #make model folder

# Fix RandomSeed

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(42)

# Hyperparameter Setting

## USER can change options through this line


In [8]:
sr = 8000 #USER OPTION : 8000 / 16000
f_method = 'melspect' #USER OPTION : melspect / mfcc
image_file = 'img_' + f_method + '_' + str(sr) #image file folder name

print(image_file)

img_melspect_8000


In [9]:
#this is class configuration
#you can change hyperparameters here
class CFG:    
    image_dir = image_file #set image file path
    num_epochs = 30 #set the epochs
    batch_size = 10 #set the batch size
    learning_rate = 0.001 #set the learning rate
    kernel_name = image_file #set kernel name for pth
    DEBUG = False #choose debug mode

# Customdataset

In [10]:
transform = transforms.Compose(
        [
            transforms.Resize((224, 224)), #image resize
            transforms.ToTensor(),#transform to tensor
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), #nomarlize average,variance as 0.5 
        ]
    )

In [11]:
#custom dataset
class CustomDataset(Dataset):
    def __init__(self, root_dir, csv_path, train_mode = True, transform=transform):
        self.root_dir = root_dir
        self.csv_path = pd.read_csv(csv_path)
        self.train_mode = train_mode
        self.transform = transform

    def __len__(self): 
        return len(self.csv_path)

    def __getitem__(self, index):
        img_id = self.csv_path.iloc[index, 1][:-4]+'.jpg' 
        img = Image.open(os.path.join(self.root_dir, img_id)).convert("RGB")

        y_label = torch.tensor(self.csv_path.iloc[index, -2]) 

        if self.transform is not None:
            img = self.transform(img)

        return (torch.tensor(img), y_label)

# Dataloader

In [12]:
os.getcwd() #check current path

'C:\\Users\\lab\\Desktop\\yj\\FINAL_TEST_CODE\\code\\deep_learning'

In [13]:
train_dataset = CustomDataset(f'../../dataset/image/{image_file}', '../../dataset/train.csv', train_mode = True, transform = transform) 
train_loader = DataLoader(train_dataset, batch_size = CFG.batch_size, shuffle=True, num_workers=0)

valid_dataset = CustomDataset(f'../../dataset/image/{image_file}', '../../dataset/valid.csv', train_mode = False, transform = transform)
valid_loader = DataLoader(valid_dataset, batch_size = CFG.batch_size, shuffle=False, num_workers=0)

In [14]:
train_batches = len(train_loader)
valid_batches = len(valid_dataset)

print('total train batches :', train_batches)
print('total valid batches :', valid_batches)

total train batches : 230
total valid batches : 766


# Model Define

In [15]:
from tqdm.auto import tqdm
import torch.nn as nn  

class CNNclassification(torch.nn.Module):
    def __init__(self):
        super(CNNclassification, self).__init__()
        self.layer1 = torch.nn.Sequential(
            nn.Conv2d(3, 200, kernel_size=3, stride=1, padding=1),  
            nn.ReLU(),  
            nn.MaxPool2d(kernel_size=3, stride=2)) 
        
        self.layer2 = torch.nn.Sequential(
            nn.Conv2d(200, 300, kernel_size=3, stride=1, padding=1), 
            nn.ReLU(), 
            nn.MaxPool2d(kernel_size=2, stride=2)) 
        
        self.layer3 = torch.nn.Sequential(
            nn.Conv2d(300, 400, kernel_size=3, stride=1, padding=1), 
            nn.ReLU(), 
            nn.MaxPool2d(kernel_size=2, stride=2)) 
          
        self.fc_layer = nn.Sequential( 
            nn.Linear(291600 , 2)  
        )    
        
    def forward(self, x):
        
        x = self.layer1(x)  
        
        x = self.layer2(x) 

        x = self.layer3(x) 
    
        x = torch.flatten(x, start_dim=1)  
        
        out = self.fc_layer(x)
        return out


In [16]:
model = CNNclassification().to(device)
print(model)

CNNclassification(
  (layer1): Sequential(
    (0): Conv2d(3, 200, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(200, 300, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv2d(300, 400, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc_layer): Sequential(
    (0): Linear(in_features=291600, out_features=2, bias=True)
  )
)


# Train/Validation

In [19]:
model = CNNclassification().to(device)
criterion = torch.nn.CrossEntropyLoss().to(device) 
optimizer = optim.Adam(model.parameters(), lr = CFG.learning_rate)
scheduler = None  

In [20]:
from tqdm.auto import tqdm
def train(model, optimizer, train_loader, scheduler, device): 
    n = len(train_loader)
    best_loss = np.inf
    
    for epoch in range(1, CFG.num_epochs): 
        model.train() 
        running_loss = 0.0        
        
        for wav, label in tqdm(iter(train_loader)):
            wav, label = wav.to(device), label.to(device)  
            optimizer.zero_grad()       
            logit = model(wav)  
            loss = criterion(logit, label)  
             
            loss.backward()  
            optimizer.step()  
            running_loss += loss.item()
        print(f'-----  Epoch {epoch} -----')     
        print('Train loss: %.10f' %(running_loss / len(train_loader))) 
                   
        model.eval()  
        valid_loss = 0.0
        correct = 0

        real_list = []
        pred_list = []
       
        with torch.no_grad(): 
            for wav, label in tqdm(iter(valid_loader)):
                
                wav, label = wav.to(device), label.to(device)
                logit = model(wav)
                valid_loss += criterion(logit, label)
                pred = logit.argmax(dim=1, keepdim=False)  
                correct += pred.eq(label.view_as(pred)).sum().item()  

                real_list.extend(label.tolist())
                pred_list.extend(pred.tolist())

        valid_acc = 100 * correct / len(valid_loader.dataset)
        valid_loss_total = valid_loss / len(valid_loader)
        print('Vail set: Loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(valid_loss / len(valid_loader), correct, len(valid_loader.dataset), 100 * correct / len(valid_loader.dataset)))
    
        content = time.ctime() + ' ' + f'Epoch {CFG.num_epochs}, lr: {optimizer.param_groups[0]["lr"]:.7f}, train loss: {(running_loss / len(train_loader)):.5f}, valid loss: {valid_loss / len(valid_loader):.5f}, Accuracy : {correct}/{len(valid_loader.dataset)} ({100 * correct / len(valid_loader.dataset)}) %'

        with open(os.path.join('./img_logs', f'log_{CFG.kernel_name}.txt'), 'a') as appender:
            appender.write(content + '\n')
            

        if best_loss > valid_loss_total:
            best_loss = valid_loss_total
            torch.save(model.state_dict(), f'./img_model/{CFG.kernel_name}.pth')
            print('Model Saved.')

# Run

In [21]:
train(model, optimizer, train_loader, scheduler, device)

100%|██████████| 230/230 [00:24<00:00,  9.52it/s]


-----  Epoch 1 -----
Train loss: 0.7284999454


100%|██████████| 77/77 [00:04<00:00, 18.90it/s]


Vail set: Loss: 0.4239, Accuracy: 628/766 (82%)

Model Saved.


100%|██████████| 230/230 [00:16<00:00, 14.31it/s]


-----  Epoch 2 -----
Train loss: 0.4194293650


100%|██████████| 77/77 [00:03<00:00, 22.22it/s]


Vail set: Loss: 0.4275, Accuracy: 602/766 (79%)



100%|██████████| 230/230 [00:16<00:00, 13.77it/s]


-----  Epoch 3 -----
Train loss: 0.3400109018


100%|██████████| 77/77 [00:03<00:00, 21.58it/s]


Vail set: Loss: 0.4093, Accuracy: 623/766 (81%)

Model Saved.


100%|██████████| 230/230 [00:16<00:00, 13.97it/s]


-----  Epoch 4 -----
Train loss: 0.3030286299


100%|██████████| 77/77 [00:03<00:00, 21.52it/s]


Vail set: Loss: 0.4763, Accuracy: 614/766 (80%)



100%|██████████| 230/230 [00:16<00:00, 13.86it/s]


-----  Epoch 5 -----
Train loss: 0.2281906155


100%|██████████| 77/77 [00:03<00:00, 21.09it/s]


Vail set: Loss: 0.4166, Accuracy: 631/766 (82%)



100%|██████████| 230/230 [00:17<00:00, 13.16it/s]


-----  Epoch 6 -----
Train loss: 0.1693647125


100%|██████████| 77/77 [00:03<00:00, 21.86it/s]


Vail set: Loss: 0.4878, Accuracy: 637/766 (83%)



100%|██████████| 230/230 [00:17<00:00, 13.33it/s]


-----  Epoch 7 -----
Train loss: 0.0911816515


100%|██████████| 77/77 [00:03<00:00, 21.31it/s]


Vail set: Loss: 0.6348, Accuracy: 623/766 (81%)



100%|██████████| 230/230 [00:16<00:00, 13.82it/s]


-----  Epoch 8 -----
Train loss: 0.0856885360


100%|██████████| 77/77 [00:03<00:00, 21.49it/s]


Vail set: Loss: 0.6466, Accuracy: 628/766 (82%)



100%|██████████| 230/230 [00:16<00:00, 13.88it/s]


-----  Epoch 9 -----
Train loss: 0.0404726614


100%|██████████| 77/77 [00:03<00:00, 22.10it/s]


Vail set: Loss: 0.9675, Accuracy: 617/766 (81%)



100%|██████████| 230/230 [00:17<00:00, 13.33it/s]


-----  Epoch 10 -----
Train loss: 0.0584576585


100%|██████████| 77/77 [00:03<00:00, 20.35it/s]


Vail set: Loss: 0.9223, Accuracy: 617/766 (81%)



100%|██████████| 230/230 [00:16<00:00, 13.87it/s]


-----  Epoch 11 -----
Train loss: 0.0245337265


100%|██████████| 77/77 [00:03<00:00, 20.32it/s]


Vail set: Loss: 1.3488, Accuracy: 612/766 (80%)



100%|██████████| 230/230 [00:16<00:00, 14.04it/s]


-----  Epoch 12 -----
Train loss: 0.0430176108


100%|██████████| 77/77 [00:03<00:00, 21.98it/s]


Vail set: Loss: 0.9661, Accuracy: 636/766 (83%)



100%|██████████| 230/230 [00:16<00:00, 14.21it/s]


-----  Epoch 13 -----
Train loss: 0.0345450394


100%|██████████| 77/77 [00:03<00:00, 22.22it/s]


Vail set: Loss: 1.1251, Accuracy: 614/766 (80%)



100%|██████████| 230/230 [00:16<00:00, 14.26it/s]


-----  Epoch 14 -----
Train loss: 0.0029864506


100%|██████████| 77/77 [00:03<00:00, 22.20it/s]


Vail set: Loss: 1.2673, Accuracy: 623/766 (81%)



100%|██████████| 230/230 [00:15<00:00, 14.52it/s]


-----  Epoch 15 -----
Train loss: 0.0008166338


100%|██████████| 77/77 [00:03<00:00, 21.47it/s]


Vail set: Loss: 1.3011, Accuracy: 627/766 (82%)



100%|██████████| 230/230 [00:16<00:00, 14.07it/s]


-----  Epoch 16 -----
Train loss: 0.0003202737


100%|██████████| 77/77 [00:03<00:00, 22.15it/s]


Vail set: Loss: 1.3407, Accuracy: 629/766 (82%)



100%|██████████| 230/230 [00:15<00:00, 14.40it/s]


-----  Epoch 17 -----
Train loss: 0.0002168532


100%|██████████| 77/77 [00:03<00:00, 22.02it/s]


Vail set: Loss: 1.3740, Accuracy: 630/766 (82%)



100%|██████████| 230/230 [00:15<00:00, 14.42it/s]


-----  Epoch 18 -----
Train loss: 0.0001579356


100%|██████████| 77/77 [00:03<00:00, 22.03it/s]


Vail set: Loss: 1.4199, Accuracy: 627/766 (82%)



100%|██████████| 230/230 [00:15<00:00, 14.39it/s]


-----  Epoch 19 -----
Train loss: 0.0001231718


100%|██████████| 77/77 [00:03<00:00, 22.29it/s]


Vail set: Loss: 1.4517, Accuracy: 626/766 (82%)



100%|██████████| 230/230 [00:15<00:00, 14.41it/s]


-----  Epoch 20 -----
Train loss: 0.0000982800


100%|██████████| 77/77 [00:03<00:00, 22.07it/s]


Vail set: Loss: 1.4788, Accuracy: 630/766 (82%)



100%|██████████| 230/230 [00:15<00:00, 14.44it/s]


-----  Epoch 21 -----
Train loss: 0.0000781168


100%|██████████| 77/77 [00:03<00:00, 22.27it/s]


Vail set: Loss: 1.5067, Accuracy: 630/766 (82%)



100%|██████████| 230/230 [00:15<00:00, 14.47it/s]


-----  Epoch 22 -----
Train loss: 0.0000641525


100%|██████████| 77/77 [00:03<00:00, 21.95it/s]


Vail set: Loss: 1.5353, Accuracy: 629/766 (82%)



100%|██████████| 230/230 [00:17<00:00, 13.51it/s]


-----  Epoch 23 -----
Train loss: 0.0000528001


100%|██████████| 77/77 [00:03<00:00, 20.49it/s]


Vail set: Loss: 1.5605, Accuracy: 629/766 (82%)



100%|██████████| 230/230 [00:15<00:00, 14.38it/s]


-----  Epoch 24 -----
Train loss: 0.0000437897


100%|██████████| 77/77 [00:03<00:00, 22.22it/s]


Vail set: Loss: 1.5839, Accuracy: 628/766 (82%)



100%|██████████| 230/230 [00:15<00:00, 14.40it/s]


-----  Epoch 25 -----
Train loss: 0.0000362890


100%|██████████| 77/77 [00:03<00:00, 22.10it/s]


Vail set: Loss: 1.6080, Accuracy: 628/766 (82%)



100%|██████████| 230/230 [00:15<00:00, 14.42it/s]


-----  Epoch 26 -----
Train loss: 0.0000307380


100%|██████████| 77/77 [00:03<00:00, 21.96it/s]


Vail set: Loss: 1.6323, Accuracy: 626/766 (82%)



100%|██████████| 230/230 [00:15<00:00, 14.42it/s]


-----  Epoch 27 -----
Train loss: 0.0000260118


100%|██████████| 77/77 [00:03<00:00, 21.09it/s]


Vail set: Loss: 1.6579, Accuracy: 627/766 (82%)



100%|██████████| 230/230 [00:17<00:00, 13.36it/s]


-----  Epoch 28 -----
Train loss: 0.0000220681


100%|██████████| 77/77 [00:03<00:00, 21.61it/s]


Vail set: Loss: 1.6786, Accuracy: 628/766 (82%)



100%|██████████| 230/230 [00:16<00:00, 13.60it/s]


-----  Epoch 29 -----
Train loss: 0.0000187572


100%|██████████| 77/77 [00:03<00:00, 22.08it/s]

Vail set: Loss: 1.6994, Accuracy: 629/766 (82%)






# Test

In [22]:
os.getcwd()

'C:\\Users\\lab\\Desktop\\yj\\FINAL_TEST_CODE\\code\\deep_learning'

In [23]:
best_model = CNNclassification().to(device)
best_model.load_state_dict(torch.load(f'./img_model/{CFG.kernel_name}.pth')) 

test_dataset = CustomDataset(f'../../dataset/image/{image_file}', '../../dataset/test.csv', train_mode = False, transform = transform)
test_loader = DataLoader(test_dataset, batch_size = CFG.batch_size, shuffle=False, num_workers=0)

In [24]:
from sklearn.metrics import classification_report
import torch.nn.functional as F
from sklearn.metrics import log_loss
from sklearn import metrics 
import math

best_model.eval()
test_loss = 0.0
correct = 0

real_list = []
pred_list = []
prob_list = []

with torch.no_grad(): 
    for wav, label in tqdm(iter(test_loader)):
        
        wav, label = wav.to(device), label.to(device)
        logit = best_model(wav)
        test_loss+=criterion(logit, label)
        prob = F.sigmoid(logit[:,1])
        prob_list.extend(prob.cpu().tolist())
        pred = logit.argmax(dim=1, keepdim=False)  
        correct += pred.eq(label.view_as(pred)).sum().item()  
        real_list.extend(label.cpu().tolist())
        pred_list.extend(pred.cpu().tolist())


100%|██████████| 77/77 [00:03<00:00, 21.32it/s]


# Evaluation

In [25]:
test_acc = 100 * correct / len(test_loader.dataset)
test_loss_total = test_loss / len(test_loader) 

print(f'Test Loss: {test_loss_total:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({test_acc:.4f}%)\n')

print(classification_report(real_list, pred_list))

fpr, tpr, thresholds = metrics.roc_curve(real_list, prob_list)
AUC = metrics.auc(fpr, tpr)

CEL = log_loss(real_list, prob_list)

print(f'CEL : {CEL:.4f}')
print(f'AUC : {AUC:.4f}')

Test Loss: 0.4002, Accuracy: 627/766 (81.8538%)

              precision    recall  f1-score   support

           0       0.91      0.69      0.79       369
           1       0.77      0.93      0.84       397

    accuracy                           0.82       766
   macro avg       0.84      0.81      0.81       766
weighted avg       0.83      0.82      0.82       766

CEL : 0.3872
AUC : 0.9208
