## Import

In [5]:
import torch
import numpy as np
from glob import glob
import pandas as pd
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torchsummary import summary
from PIL import Image
import os
import re
import nibabel as nib

## Data Load

In [6]:
import zipfile
zip_file = zipfile.ZipFile('/content/drive/MyDrive/chest_xray.zip')
zip_file.extractall('/content/datasets')
zip_file.close()

In [7]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [8]:
PATH = '/content/datasets/chest_xray/'

from glob import glob

normal_train_data = glob(PATH + 'train/NORMAL/*.jpeg')
pneumonia_train_data = glob(PATH + 'train/PNEUMONIA/*.jpeg')
normal_val_data = glob(PATH + 'val/NORMAL/*.jpeg')
pneumonia_val_data = glob(PATH + 'val/PNEUMONIA/*.jpeg')
normal_test_data = glob(PATH + 'test/NORMAL/*.jpeg')
pneumonia_test_data = glob(PATH + 'test/PNEUMONIA/*.jpeg')

In [9]:
train_path  = normal_train_data + pneumonia_train_data
val_path = normal_val_data + pneumonia_val_data
test_path = normal_test_data + pneumonia_test_data

## Data Load

In [10]:
classes = ['NORMAL','PNEUMONIA']

In [11]:
class2idx = {cls:idx for idx, cls in enumerate(classes)}
class2idx

{'NORMAL': 0, 'PNEUMONIA': 1}

In [12]:
class Train_Dataset(Dataset):
    def __init__(self, data_path,transform = None):
        self.data_path = data_path
        self.transform = transform
    def __len__(self):
        return len(self.data_path)
    def __getitem__(self,idx):
        path = self.data_path[idx]
        img = Image.open(path)
        img = np.squeeze(np.array(img.resize((512,512))))
        img = img/255
        if len(img.shape) == 2:
          img = img[:,:,np.newaxis]
          img = np.concatenate((img,img,img), axis = -1)
        else:
          pass
        label = path.split('/')[-2]
        label = class2idx[label]
        if self.transform is not None:
            transformed = self.transform(image=img)
            image = transformed['image']
        return image, label

In [13]:
class val_Dataset(Dataset):
    def __init__(self, data_path,transform = None):
        self.data_path = data_path
        self.transform = transform
    def __len__(self):
        return len(self.data_path)
    def __getitem__(self,idx):
        path = self.data_path[idx]
        img = Image.open(path)
        img = np.squeeze(np.array(img.resize((512,512))))
        img = img/255
        if len(img.shape) == 2:
          img = img[:,:,np.newaxis]
          img = np.concatenate((img,img,img), axis = -1)
        else:
          pass
        label = path.split('/')[-2]
        label = class2idx[label]
        if self.transform is not None:
            transformed = self.transform(image=img)
            image = transformed['image']
        return image, label

In [14]:
class test_Dataset(Dataset):
    def __init__(self, data_path,transform = None):
        self.data_path = data_path
        self.transform = transform
    def __len__(self):
        return len(self.data_path)
    def __getitem__(self,idx):
        path = self.data_path[idx]
        img = Image.open(path)
        img = np.squeeze(np.array(img.resize((512,512))))
        img = img/255
        if len(img.shape) == 2:
          img = img[:,:,np.newaxis]
          img = np.concatenate((img,img,img), axis = -1)
        else:
          pass
        label = path.split('/')[-2]
        label = class2idx[label]
        if self.transform is not None:
            transformed = self.transform(image=img)
            image = transformed['image']
        return image, label

In [15]:
#!pip install --upgrade --force-reinstall --no-deps albumentations
!pip install albumentations==0.4.6

Collecting albumentations==0.4.6
  Downloading albumentations-0.4.6.tar.gz (117 kB)
[K     |████████████████████████████████| 117 kB 18.4 MB/s 
Collecting imgaug>=0.4.0
  Downloading imgaug-0.4.0-py2.py3-none-any.whl (948 kB)
[K     |████████████████████████████████| 948 kB 52.7 MB/s 
Building wheels for collected packages: albumentations
  Building wheel for albumentations (setup.py) ... [?25l[?25hdone
  Created wheel for albumentations: filename=albumentations-0.4.6-py3-none-any.whl size=65174 sha256=6b234ca1b4664b5c695fb8d3d9344d28077d7e3ea8d59f6cd6525b7c1c417c52
  Stored in directory: /root/.cache/pip/wheels/cf/34/0f/cb2a5f93561a181a4bcc84847ad6aaceea8b5a3127469616cc
Successfully built albumentations
Installing collected packages: imgaug, albumentations
  Attempting uninstall: imgaug
    Found existing installation: imgaug 0.2.9
    Uninstalling imgaug-0.2.9:
      Successfully uninstalled imgaug-0.2.9
  Attempting uninstall: albumentations
    Found existing installation: albu

In [16]:
import albumentations as A                                                                           
from albumentations.pytorch import ToTensorV2

In [17]:
data_transforms = {
    'train': A.Compose(
    [
      ToTensorV2()
     ]
    ),
    'val': A.Compose(
        [
     ToTensorV2()
                            ]
                           ),
    'test' : A.Compose(
        [
         ToTensorV2()
        ]
    )
}

In [18]:
# Top level data directory. Here we assume the format of the directory conforms
#   to the ImageFolder structure

# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception]

# Number of classes in the dataset
num_classes = 1

# Batch size for training (change depending on how much memory you have)
batch_size = 2

# Number of epochs to train for
num_epochs = 10

In [19]:
train_data = Train_Dataset(train_path,transform = data_transforms['train'])
val_data = val_Dataset(val_path,transform =  data_transforms['val'])
test_data=  test_Dataset(test_path , transform =  data_transforms['test'])

In [20]:
image_datasets = {'train' : train_data , 'val' : val_data , 'test' : test_data}
# Create training and validation dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True) for x in ['train', 'val' , 'test']}
# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## Model

In [21]:
from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

In [22]:
class Net(nn.Module):
    def __init__(self,number_classes):
        super(Net, self).__init__()
        self.classes = number_classes
        self.conv = nn.Sequential(
            #3 224 128
            nn.Conv2d(3, 64, 3, padding=1),nn.LeakyReLU(0.2),
            nn.Conv2d(64, 64, 3, padding=1),nn.LeakyReLU(0.2),
            nn.MaxPool2d(2, 2),
            #64 112 64
            nn.Conv2d(64, 128, 3, padding=1),nn.LeakyReLU(0.2),
            nn.Conv2d(128, 128, 3, padding=1),nn.LeakyReLU(0.2),
            nn.MaxPool2d(2, 2),
            #128 56 32
            nn.Conv2d(128, 256, 3, padding=1),nn.LeakyReLU(0.2),
            nn.Conv2d(256, 256, 3, padding=1),nn.LeakyReLU(0.2),
            nn.Conv2d(256, 256, 3, padding=1),nn.LeakyReLU(0.2),
            nn.MaxPool2d(2, 2),
            #256 28 16
            nn.Conv2d(256, 512, 3, padding=1),nn.LeakyReLU(0.2),
            nn.Conv2d(512, 512, 3, padding=1),nn.LeakyReLU(0.2),
            nn.Conv2d(512, 512, 3, padding=1),nn.LeakyReLU(0.2),
            nn.MaxPool2d(2, 2),
            #512 14 8
            nn.Conv2d(512, 512, 3, padding=1),nn.LeakyReLU(0.2),
            nn.Conv2d(512, 512, 3, padding=1),nn.LeakyReLU(0.2),
            nn.Conv2d(512, 512, 3, padding=1),nn.LeakyReLU(0.2),
            nn.MaxPool2d(2, 2)
        )
        #512 7 4

        self.avg_pool = nn.AvgPool2d(7)
        #512 2 2
        self.fc1 = nn.Linear(2048,512)
        self.fc2 = nn.Linear(512,256)
        self.fc3 = nn.Linear(256, self.classes)

    def forward(self, x):
        #print(x.size())
        features = self.conv(x)
        #print(features.size())
        x = self.avg_pool(features)
        #print(x.size())
        x = x.view(features.size(0), -1)
        #print(x.size())
        x = self.fc1(x)
        x= nn.ReLU(inplace=True)(x)
        x = self.fc2(x)
        x= nn.ReLU(inplace=True)(x)
        x = self.fc3(x)        
        #x = self.softmax(x)
        return x

In [23]:
from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score , roc_curve

In [65]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25):
    since = time.time()
    train_acc_history = []
    train_loss_hist = [] 
    train_precision = [] 
    train_recall = [] 
    train_auc = [] 
    train_f1_score =[]
    val_loss_hist = []
    val_acc_history = [] 
    val_precision = [] 
    val_recall = [] 
    val_auc = [] 
    val_f1_score = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            start = time.time()
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0
            running_precision =0
            running_recall = 0
            running_f1_score =0 
            running_auc = 0
            # Iterate over data.
            full_label = []
            full_output = []
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device = device, dtype = torch.float32)
                labels = labels.to(device = device)
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.
                    outputs = model(inputs)
                    loss = criterion(torch.squeeze(outputs,dim = -1),labels.float())
                    pred = nn.Sigmoid()(outputs)
                    pred[pred <0.5] = 0
                    pred[pred >=0.5] = 1
                    if phase == 'train':
                    # backward + optimize only if in training phase
                        loss.backward()
                        optimizer.step()
                # statistics
                running_loss += loss.item() * inputs.size(0)
                full_label+=list(torch.squeeze(labels,dim= -1).detach().cpu().numpy())
                full_output+=list(pred.detach().cpu().numpy())
                running_corrects += torch.sum(torch.squeeze(pred) == labels.data)
                '''
                running_precision += precision_score(labels , outputs)
                running_recall += recall_score(labels , outputs)
                running_auc += roc_auc_score(labels , outputs)
                running_f1_score += f1_score(labels.detach().cpu().numpy(), pred , average = 'macro')
                '''

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
            epoch_f1_score = f1_score(full_label, full_output)
            epoch_precision = recall_score(full_label, full_output)
            epoch_recall = recall_score(full_label, full_output)
            epoch_auc = roc_auc_score(full_label, full_output)
            if phase == 'train':
              train_acc_history.append(epoch_acc)
              train_loss_hist.append(epoch_loss)
              train_f1_score.append(epoch_f1_score)
              train_precision.append(epoch_precision)
              train_recall.append(epoch_recall)
              train_auc.append(epoch_auc)
              train_f1_score.append(epoch_f1_score)
            print('{} Loss: {:.4f} Acc: {:.4f},P : {:.4f} , R :{:.4f} , AUC : {:.4f},f1 :{:.4f},Time : {:.4f}'.format(phase, epoch_loss ,epoch_acc, epoch_precision,epoch_recall, epoch_auc, epoch_f1_score,time.time()-start))
            #,   , , epoch_precision,epoch_recall, epoch_auc, epoch_f1_score  

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)
                val_loss_hist.append(epoch_loss)
                val_f1_score.append(epoch_f1_score)
                val_precision.append(epoch_precision)
                val_recall.append(epoch_recall)
                val_auc.append(epoch_auc)
                val_f1_score.append(epoch_f1_score)

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    train_dict = {'Loss' : train_loss_hist , 'Accuarcy' : train_acc_history, 'Precision' : train_precision, 'Recall' : train_recall, 'Auc' : train_auc ,'f1' : train_f1_score }
    #
    val_dict = {'Loss' :val_loss_hist , 'Accuarcy' : val_acc_history,'Precision' : val_precision , 'Recall' : val_recall, 'Auc' : val_auc ,  'f1' : val_f1_score}
    #, 
    return model, train_dict ,val_dict

In [66]:
model_ft = Net(num_classes)

In [67]:
summary(model_ft.to('cuda'),(3,512,512))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 512, 512]           1,792
         LeakyReLU-2         [-1, 64, 512, 512]               0
            Conv2d-3         [-1, 64, 512, 512]          36,928
         LeakyReLU-4         [-1, 64, 512, 512]               0
         MaxPool2d-5         [-1, 64, 256, 256]               0
            Conv2d-6        [-1, 128, 256, 256]          73,856
         LeakyReLU-7        [-1, 128, 256, 256]               0
            Conv2d-8        [-1, 128, 256, 256]         147,584
         LeakyReLU-9        [-1, 128, 256, 256]               0
        MaxPool2d-10        [-1, 128, 128, 128]               0
           Conv2d-11        [-1, 256, 128, 128]         295,168
        LeakyReLU-12        [-1, 256, 128, 128]               0
           Conv2d-13        [-1, 256, 128, 128]         590,080
        LeakyReLU-14        [-1, 256, 1

In [68]:
model_ft = model_ft.to(device)

# Observe that all parameters are being optimized
optimizer_ft = optim.Adam(model_ft.parameters(),lr=0.001) # 업데이트 할 파라미터만 넣어준다.

In [69]:
criterion = nn.BCEWithLogitsLoss()

# Train and eval ate
model_ft, train_dict, val_dict  = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, num_epochs=num_epochs)

Epoch 0/9
----------
train Loss: 371576.9113 Acc: 0.6957,P : 0.7907 , R :0.7907 , AUC : 0.6060,f1 :0.7943,Time : 617.0439
val Loss: 23.6826 Acc: 0.6250,P : 0.8750 , R :0.8750 , AUC : 0.6250,f1 :0.7000,Time : 0.7611
Epoch 1/9
----------
train Loss: 36.4739 Acc: 0.7218,P : 0.8134 , R :0.8134 , AUC : 0.6353,f1 :0.8129,Time : 630.4577
val Loss: 53.8171 Acc: 0.5000,P : 0.0000 , R :0.0000 , AUC : 0.5000,f1 :0.0000,Time : 0.7557
Epoch 2/9
----------
train Loss: 3.6605 Acc: 0.7504,P : 0.8354 , R :0.8354 , AUC : 0.6701,f1 :0.8326,Time : 641.0241
val Loss: 0.9189 Acc: 0.6250,P : 0.3750 , R :0.3750 , AUC : 0.6250,f1 :0.5000,Time : 0.7494
Epoch 3/9
----------
train Loss: 31824171850.0146 Acc: 0.7168,P : 0.9154 , R :0.9154 , AUC : 0.5293,f1 :0.8277,Time : 622.2898
val Loss: 0.7418 Acc: 0.5000,P : 1.0000 , R :1.0000 , AUC : 0.5000,f1 :0.6667,Time : 0.7472
Epoch 4/9
----------
train Loss: 0.5761 Acc: 0.7429,P : 1.0000 , R :1.0000 , AUC : 0.5000,f1 :0.8525,Time : 610.7517
val Loss: 0.8039 Acc: 0.5000,

KeyboardInterrupt: ignored

In [None]:
torch.save(model_ft, '/content/drive/Shareddrives/캡스톤 디자인1/codes/vgg16_pneumonia.pt')

In [None]:
train_loss = []
val_loss = []
for i in range(len(train_dict['Loss'])):
  train_loss.append(train_dict['Loss'][i])
for i in range(len(val_dict['Loss'])):
  val_loss.append(val_dict['Loss'][i])

In [None]:
plt.plot(train_loss)
plt.plot(val_loss)
plt.title('Vgg16 Loss', fontsize = 15)
plt.xlabel('Epoch', fontsize = 15)
plt.ylabel('Loss', fontsize = 15)
plt.ylim(0,1)
plt.legend(['train','test'], loc='upper left')
plt.show()

In [None]:
train_accuracy = []
val_accuracy = []

In [None]:
val_acc[0].to('cpu').numpy()

In [None]:
train_accuracy = []
val_accuracy = []
for i in range(len(train_dict['Accuarcy'])):
  train_accuracy.append(train_dict['Accuarcy'][i].to('cpu').numpy())
for i in range(len(val_dict['Accuarcy'])):
  val_accuracy.append(val_dict['Accuarcy'][i].to('cpu').numpy())

In [None]:
plt.plot(train_accuracy)
plt.plot(val_accuracy)
plt.title('Vgg16 Accuracy', fontsize = 15)
plt.xlabel('Epoch', fontsize = 15)
plt.ylabel('Accuracy', fontsize = 15)
plt.ylim(0,1)
plt.legend(['train','test'], loc='upper left')
plt.show()

In [None]:
train_acc[0].to('cpu').numpy()

In [None]:
train_accuracy

In [None]:
model_ft = torch.load('/content/drive/Shareddrives/캡스톤 디자인1/codes/vgg16_multiclassclassfication_model')

In [None]:
class test_Dataset(Dataset):
    def __init__(self, data_path,transform = None):
        self.data_path = data_path
        self.transform = transform
    def __len__(self):
        return len(self.data_path)
    def __getitem__(self,idx):
        path = self.data_path[idx]
        img = np.array(Image.open(path))
        img = img[:,:,np.newaxis]
        img = img/255
        label =  class2idx[df[df['Image Index']== path.split('/')[-1]]['Finding Labels'].values[0]]
        if self.transform is not None:
            transformed = self.transform(image=img)
            image = transformed['image']
        return image.float(), label

In [None]:
data_transforms_test = A.Compose(
        [
     ToTensorV2()
                            ]
                           )


In [None]:
test_data = test_Dataset(val_path,transform = data_transforms_test)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle= False)

In [None]:
running_loss = 0.0
running_corrects = 0
start = time.time()
output_list = list()
for inputs, labels in test_loader:
    inputs = inputs.to(device = device, dtype = torch.float32)
    labels = labels.to(device = device)
    # zero the parameter gradients
    optimizer_ft.zero_grad()
    outputs = model_ft(inputs)
    output_list+=list((torch.argmax(outputs,axis = 1).cpu().numpy()))
    loss = criterion(outputs, labels)
    _, preds = torch.max(outputs, 1)
    running_loss += loss.item() * inputs.size(0)
    running_corrects += torch.sum(preds == labels.data)
    epoch_loss = running_loss / len(test_loader.dataset)
    epoch_acc = running_corrects.double() / len(test_loader.dataset)
print('Test Loss: {:.4f} Acc: {:.4f}, Time : {:.4f}'.format(epoch_loss, epoch_acc, time.time()-start))

In [None]:
labels

In [None]:
torch.argmax(outputs,axis = 1).cpu().numpy()

In [None]:
outputs.shape

In [None]:
dummy_df = pd.DataFrame(output_list)

In [None]:
dummy_df.value_counts()