Colab使用說明，請先點選左上角執行階段=>變更執行階段類型=>硬體加速器選擇GPU，接著回到頁面點選右上角的連線，接著執行下方儲存格

>醫學影像專題 
資料集來源:https://www.kaggle.com/datasets/maedemaftouni/large-covid19-ct-slice-dataset

# kaggle

> 在kaggle註冊後，點=>自己頭像=>account=>Create New API Token按下去會有一個kaggle_json檔案下載










> 把剛才的json檔案上傳



In [None]:
! pip install -q kaggle
from google.colab import files
files.upload()

In [None]:
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json



> 下載zip檔案



In [None]:
! kaggle datasets download -d maedemaftouni/large-covid19-ct-slice-dataset

Downloading large-covid19-ct-slice-dataset.zip to /content
100% 2.06G/2.06G [01:34<00:00, 23.7MB/s]
100% 2.06G/2.06G [01:34<00:00, 23.4MB/s]




> 創建資料夾input，並解壓縮檔案至input



In [None]:
! mkdir input

In [None]:
! unzip large-covid19-ct-slice-dataset.zip -d input

# Dataset(除了train、validation的比例可更動外，其餘部分不要更動)

---


**非常重要**，因為資料集給的是一個病患好幾張切片，所以在分資料集時"不能"直接以切片為單位分，而是要以病患為單位分


病患人數為604、464，資料集比例分為6:2:2

In [None]:
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import Dataset
from pathlib import Path
from PIL import Image
import random
import numpy as np
from torch.utils.data import DataLoader
from torchvision import transforms
import copy
import time
import os
import torchvision.models as models
from tqdm import tqdm
from typing import Literal
from functools import reduce
import csv

def load_patients(csv_path, data_dir_path):
    patients = {}
    with open(csv_path, encoding= 'unicode_escape') as csvFile : 
        csvDictReader = csv.DictReader(csvFile) 
        for row in csvDictReader:
            pid = row["Patient ID"]
            if patients.get(pid) is None:
                patients[pid] = []
            patients[pid].append(os.path.join(data_dir_path, row["File name"]))

    return [patient for patient in patients.values()]

def percent_list_slice(x, start=0., end=1.):
    return x[int(len(x)*start):int(len(x)*end)]

class CovidCT(Dataset):
    def __init__(self,
                 data_root,
                 mode: Literal["train", "valid", "test"] = "train",
                 transform=None):
        if mode == "train":
            start, end = 0.0, 0.6
        elif mode == "valid":
            start, end = 0.6, 0.8
        elif mode == "test":
            start, end = 0.8, 1.0

        normal_patients = load_patients(
            os.path.join(data_root, "meta_data_normal.csv"),
            os.path.join(data_root, "curated_data/curated_data/1NonCOVID"))
        normal_patients = percent_list_slice(normal_patients, start, end)
        normal_file_paths = reduce(lambda a, b: a+b, normal_patients)
        
        covid_patients = load_patients(
            os.path.join(data_root, "meta_data_covid.csv"),
            os.path.join(data_root, "curated_data/curated_data/2COVID"))
        covid_patients = percent_list_slice(covid_patients, start, end)
        covid_file_paths = reduce(lambda a, b: a+b, covid_patients)

        self.file_paths = normal_file_paths + covid_file_paths
        self.labels = [0]*len(normal_file_paths) + [1]*len(covid_file_paths)
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, index):
        image = Image.open(self.file_paths[index]).convert('RGB')
        if self.transform:
            image = self.transform(image)

        return image, self.labels[index]

#Train


> 請注意，由於code為了方便檢視，將資料儲存在最外層(意思就是colab down之後，資料就會不見，請記得更改儲存路徑)


---


> 在這裡可以發揮您的創造力去改變模型以及任何超參數





In [None]:
CUDA_DEVICES = 0
init_lr = 0.01

# Save model every 5 epochs
checkpoint_interval = 5
if not os.path.isdir('./Checkpoint/'):
    os.mkdir('./Checkpoint/')


# Setting learning rate operation
def adjust_lr(optimizer, epoch):
    # 1/10 learning rate every 5 epochs
    lr = init_lr * (0.1 ** (epoch // 5))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

def train():
    # If out of memory , adjusting the batch size smaller
    data_transform = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    trainset = CovidCT("/content/input/", "train", data_transform)
    train_dl = DataLoader(trainset, batch_size=9, shuffle=True, num_workers=3)
    validset = CovidCT("/content/input/", "valid", data_transform)
    valid_dl = DataLoader(validset, batch_size=9, shuffle=False, num_workers=3)
    classes = ['1NonCOVID','2COVID']
    
    model=models.resnet18(pretrained=True)
    model.fc=nn.Linear(in_features=512, out_features=2, bias=True) #如果要使用預訓練模型，記得修改最後一層輸出的class數量
    print(model)
    print("==========")

    total = sum([param.nelement() for param in model.parameters()])
    print("Number of parameter: %.2fM" % (total/1e6))
    model = model.cuda(CUDA_DEVICES)

    model.train()

    best_model_params = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    # Training epochs
    num_epochs = 5 
    criterion = nn.CrossEntropyLoss()
    
    # Optimizer setting
    optimizer = torch.optim.SGD(params=model.parameters(), lr=init_lr, momentum=0.9)

    # Log 
    with open('TrainingAccuracy.txt','w') as fAcc:
        print('Accuracy\n', file = fAcc)
    with open('TrainingLoss.txt','w') as fLoss:
        print('Loss\n', file = fLoss)

    for epoch in range(num_epochs):
        model.train()
        localtime = time.asctime( time.localtime(time.time()) )
        print('Epoch: {}/{} --- < Starting Time : {} >'.format(epoch + 1,num_epochs,localtime))
        print('-' * len('Epoch: {}/{} --- < Starting Time : {} >'.format(epoch + 1,num_epochs,localtime)))

        training_loss = 0.0
        training_corrects = 0
        adjust_lr(optimizer, epoch)

        for i, (inputs, labels) in (enumerate(tqdm(train_dl))):

            inputs = Variable(inputs.cuda(CUDA_DEVICES))
            labels = Variable(labels.cuda(CUDA_DEVICES))
            optimizer.zero_grad()

            outputs = model(inputs)
            _, preds = torch.max(outputs.data, 1)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            training_loss += float(loss.item() * inputs.size(0))
            training_corrects += torch.sum(preds == labels.data).item()

        training_loss = training_loss / len(trainset)
        training_acc = training_corrects /len(trainset)
        print('\n Training loss: {:.4f}\taccuracy: {:.4f}\n'.format(training_loss,training_acc))
        

        # Check best accuracy model ( but not the best on test )
        if training_acc > best_acc:
            best_acc = training_acc
            best_model_params = copy.deepcopy(model.state_dict())


        with open('TrainingAccuracy.txt','a') as fAcc:
            print('{:.4f} '.format(training_acc), file = fAcc)
        with open('TrainingLoss.txt','a') as fLoss:
            print('{:.4f} '.format(training_loss), file = fLoss)
        if (epoch + 1) % checkpoint_interval == 0:
            torch.save(model, './Checkpoint/model-epoch-{:d}-train.pth'.format(epoch + 1))

        model = model.cuda(CUDA_DEVICES)
        model.eval()
        total_correct = 0
        total = 0
        class_correct = list(0. for i in enumerate(classes))
        class_total = list(0. for i in enumerate(classes))

        with torch.no_grad():
            for inputs, labels in tqdm(valid_dl):
                inputs = Variable(inputs.cuda(CUDA_DEVICES))
                labels = Variable(labels.cuda(CUDA_DEVICES))
                outputs = model(inputs)
                
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                total_correct += (predicted == labels).sum().item()
                c = (predicted == labels).squeeze()
                

                for i in range(labels.size(0)):
                    label = labels[i]
                    class_correct[label] += c[i].item()
                    class_total[label] += 1

            for i, c in enumerate(classes):
              if(class_total[i]==0):
                print('Accuracy of %5s : %8.4f %%' % (
                c, 100 * 0))
              else:
                print('Accuracy of %5s : %8.4f %%' % (
                c, 100 * class_correct[i] / class_total[i]))

            # Accuracy
            print('\nAccuracy on the ALL val images: %.4f %%'
              % (100 * total_correct / total))
            
    # Save best training/valid accuracy model ( not the best on test )
    model.load_state_dict(best_model_params)
    best_model_name = './Checkpoint/model-{:.2f}-best_train_acc.pth'.format(best_acc)
    torch.save(model, best_model_name)
    print("Best model name : " + best_model_name)
if __name__ == '__main__':
    train()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

100%|██████████| 709/709 [00:45<00:00, 15.57it/s]



 Training loss: 0.8957	accuracy: 0.6312



100%|██████████| 286/286 [00:14<00:00, 19.69it/s]


Accuracy of 1NonCOVID :  95.8264 %
Accuracy of 2COVID :  94.6667 %

Accuracy on the ALL val images: 95.7588 %
Epoch: 2/5 --- < Starting Time : Mon Mar 13 11:37:59 2023 >
-----------------------------------------------------------


100%|██████████| 709/709 [00:45<00:00, 15.63it/s]



 Training loss: 0.5905	accuracy: 0.7184



100%|██████████| 286/286 [00:14<00:00, 20.01it/s]


Accuracy of 1NonCOVID :  99.6281 %
Accuracy of 2COVID :  99.3333 %

Accuracy on the ALL val images: 99.6109 %
Epoch: 3/5 --- < Starting Time : Mon Mar 13 11:38:58 2023 >
-----------------------------------------------------------


100%|██████████| 709/709 [00:46<00:00, 15.38it/s]



 Training loss: 0.4899	accuracy: 0.7780



100%|██████████| 286/286 [00:14<00:00, 19.79it/s]


Accuracy of 1NonCOVID :  97.2314 %
Accuracy of 2COVID :  98.0000 %

Accuracy on the ALL val images: 97.2763 %
Epoch: 4/5 --- < Starting Time : Mon Mar 13 11:39:59 2023 >
-----------------------------------------------------------


100%|██████████| 709/709 [00:45<00:00, 15.67it/s]



 Training loss: 0.3938	accuracy: 0.8194



100%|██████████| 286/286 [00:14<00:00, 19.81it/s]


Accuracy of 1NonCOVID :  99.6281 %
Accuracy of 2COVID :  93.3333 %

Accuracy on the ALL val images: 99.2607 %
Epoch: 5/5 --- < Starting Time : Mon Mar 13 11:40:59 2023 >
-----------------------------------------------------------


100%|██████████| 709/709 [00:45<00:00, 15.41it/s]



 Training loss: 0.3807	accuracy: 0.8313



100%|██████████| 286/286 [00:14<00:00, 19.81it/s]

Accuracy of 1NonCOVID :  96.2397 %
Accuracy of 2COVID : 100.0000 %

Accuracy on the ALL val images: 96.4591 %
Best model name : ./Checkpoint/model-0.83-best_train_acc.pth





# Test


> 可以在此增加其他常見的評估指標(F1、AUC...)



In [None]:
import torch
from torch.autograd import Variable
from torchvision import transforms
from pathlib import Path
from PIL import Image
from torch.utils.data import DataLoader
import numpy as np

CUDA_DEVICES = 0
PATH_TO_WEIGHTS = '/content/Checkpoint/model-0.83-best_train_acc.pth' # Your model name


def test():
    data_transform = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    testset = CovidCT("/content/input/", "test", data_transform)
    test_dl = DataLoader(testset, batch_size=7, shuffle=False, pin_memory=True, num_workers=3)
    classes = ['1NonCOVID','2COVID']

    # Load model
    model = torch.load(PATH_TO_WEIGHTS)
    model = model.cuda(CUDA_DEVICES)
    model.eval()
    
    total_correct = 0
    total = 0
    class_correct = list(0. for i in enumerate(classes))
    class_total = list(0. for i in enumerate(classes))

    with torch.no_grad():
        for inputs, labels in tqdm(test_dl):
            inputs = Variable(inputs.cuda(CUDA_DEVICES))
            labels = Variable(labels.cuda(CUDA_DEVICES))
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            
            # totoal
            total += labels.size(0)
            total_correct += (predicted == labels).sum().item()
            c = (predicted == labels).squeeze()
            
            # batch size
            for i in range(labels.size(0)):
                label = labels[i]
                class_correct[label] += c[i].item()
                class_total[label] += 1
    
    for i, c in enumerate(classes):
        print('Accuracy of %5s : %8.4f %%' % (
        c, 100 * class_correct[i] / class_total[i]))

    # Accuracy
    print('\nAccuracy on the ALL test images: %.4f %%'
      % (100 * total_correct / total))

if __name__ == '__main__':
    test()


100%|██████████| 792/792 [00:33<00:00, 23.78it/s]

Accuracy of 1NonCOVID :  95.7831 %
Accuracy of 2COVID :  67.2591 %

Accuracy on the ALL test images: 75.8030 %



