In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from torchvision.io import read_image, ImageReadMode

from zipfile import ZipFile
import os
import glob
from tqdm import tqdm

In [9]:
import session_info
session_info.show()

In [19]:
# set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
# extract images into new directory
ARCHIVE_PATH = './archive.zip'
TARGET_PATH = './dataset'
with ZipFile(ARCHIVE_PATH, 'r') as archive:
    archive.extractall(path=TARGET_PATH)

In [3]:
# creating custom dataset class
class BrainTumorDataset(Dataset):
    def __init__(self, img_dir, transform=None, target_transform=None):
        self.img_dir = img_dir
        self.files = [file for file in glob.iglob(self.img_dir + '/**/*.jpg', recursive=True)] # store list of all filenames
        self.data_length = len(self.files) # save length
        self.transform = transform
        self.target_transform = target_transform

    
    def __len__(self):
        return self.data_length
    
    def __getitem__(self, idx):
        filepath = self.files[idx]
        image = read_image(filepath, mode=ImageReadMode.RGB) # convert any non-RGB pictures to 3 channels
        label = filepath.split('/')[-2] #
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label
        

In [12]:
# specify transformations for dataloaders
input_transformations = transforms.Compose([
    transforms.Resize((224, 224), antialias=True)
])

label_to_i = {
    'notumor': 0,
    'glioma': 1,
    'meningioma': 2, 
    'pituitary': 3
}

output_transformation = lambda x: label_to_i[x]

In [74]:
# hyperparameters
batch_size = 128
epochs = 30
num_classes = 4
learning_rate = 0.001

In [75]:
# create dataloaders
TRAIN_DIR = './dataset/Training'
TEST_DIR = './dataset/Testing'
# creating training split
train_data = BrainTumorDataset(img_dir = TRAIN_DIR,
                               transform = input_transformations, 
                               target_transform = output_transformation)
train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
# creating training split
test_data = BrainTumorDataset(img_dir = TEST_DIR,
                              transform = input_transformations,
                              target_transform = output_transformation)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [76]:
# create model
model = models.resnet18(weights=None)
model.fc = nn.Linear(512, num_classes)

# send model to gpu
model = model.to(device)

In [77]:
# define optimizer and loss
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

In [78]:
# training loop
for epoch in range(epochs):
    print(f'Epoch: [{epoch+1}/{epochs}]')
    running_loss = 0
    for batch_idx, (data, targets) in enumerate(tqdm(train_dataloader)):
        data, targets = data.to(device).float(), targets.to(device).long() # send tensors to gpu

        # forward pass
        scores = model(data) 
        loss = criterion(scores, targets)
        running_loss += loss.item()*data.shape[0]

        # backward pass
        optimizer.zero_grad()
        loss.backward()

        # optimizer step
        optimizer.step()
    print(f'Loss: {running_loss}')
    

Epoch: [1/30]


100%|██████████| 45/45 [00:21<00:00,  2.09it/s]


Loss: 3520.6408014297485
Epoch: [2/30]


100%|██████████| 45/45 [00:21<00:00,  2.11it/s]


Loss: 2560.1507291793823
Epoch: [3/30]


100%|██████████| 45/45 [00:21<00:00,  2.07it/s]


Loss: 1706.8593578338623
Epoch: [4/30]


100%|██████████| 45/45 [00:22<00:00,  2.04it/s]


Loss: 1372.6516642570496
Epoch: [5/30]


100%|██████████| 45/45 [00:22<00:00,  1.99it/s]


Loss: 1190.4890949726105
Epoch: [6/30]


100%|██████████| 45/45 [00:22<00:00,  1.97it/s]


Loss: 869.58806848526
Epoch: [7/30]


100%|██████████| 45/45 [00:22<00:00,  1.98it/s]


Loss: 687.2053520679474
Epoch: [8/30]


100%|██████████| 45/45 [00:22<00:00,  2.00it/s]


Loss: 416.70775079727173
Epoch: [9/30]


100%|██████████| 45/45 [00:22<00:00,  1.99it/s]


Loss: 497.0117120742798
Epoch: [10/30]


100%|██████████| 45/45 [00:22<00:00,  1.99it/s]


Loss: 378.64828407764435
Epoch: [11/30]


100%|██████████| 45/45 [00:22<00:00,  1.99it/s]


Loss: 207.9038160443306
Epoch: [12/30]


100%|██████████| 45/45 [00:22<00:00,  1.99it/s]


Loss: 134.46608167886734
Epoch: [13/30]


100%|██████████| 45/45 [00:22<00:00,  1.98it/s]


Loss: 148.0156723856926
Epoch: [14/30]


100%|██████████| 45/45 [00:22<00:00,  1.98it/s]


Loss: 211.85342171788216
Epoch: [15/30]


100%|██████████| 45/45 [00:22<00:00,  1.98it/s]


Loss: 148.89386492967606
Epoch: [16/30]


100%|██████████| 45/45 [00:22<00:00,  1.98it/s]


Loss: 109.11355365812778
Epoch: [17/30]


100%|██████████| 45/45 [00:22<00:00,  1.98it/s]


Loss: 60.28053462691605
Epoch: [18/30]


100%|██████████| 45/45 [00:22<00:00,  1.98it/s]


Loss: 84.49045938253403
Epoch: [19/30]


100%|██████████| 45/45 [00:22<00:00,  1.98it/s]


Loss: 171.00672687590122
Epoch: [20/30]


100%|██████████| 45/45 [00:22<00:00,  1.98it/s]


Loss: 203.2018091082573
Epoch: [21/30]


100%|██████████| 45/45 [00:22<00:00,  1.98it/s]


Loss: 99.3575929403305
Epoch: [22/30]


100%|██████████| 45/45 [00:22<00:00,  1.98it/s]


Loss: 66.90299233049154
Epoch: [23/30]


100%|██████████| 45/45 [00:22<00:00,  1.96it/s]


Loss: 49.47613747417927
Epoch: [24/30]


100%|██████████| 45/45 [00:23<00:00,  1.94it/s]


Loss: 30.84241706877947
Epoch: [25/30]


100%|██████████| 45/45 [00:23<00:00,  1.95it/s]


Loss: 6.864888174459338
Epoch: [26/30]


100%|██████████| 45/45 [00:23<00:00,  1.96it/s]


Loss: 3.9304363464470953
Epoch: [27/30]


100%|██████████| 45/45 [00:23<00:00,  1.96it/s]


Loss: 2.0000771281775087
Epoch: [28/30]


100%|██████████| 45/45 [00:23<00:00,  1.95it/s]


Loss: 1.4598265249514952
Epoch: [29/30]


100%|██████████| 45/45 [00:23<00:00,  1.96it/s]


Loss: 1.103943954454735
Epoch: [30/30]


100%|██████████| 45/45 [00:22<00:00,  1.97it/s]

Loss: 0.729454645421356





In [82]:
# check accuracy
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device).float()
            y = y.to(device=device).long()

            scores = model(x)
            _, predictions = scores.max(1) # gives us the prediction (digit w/ maximum likelihood)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

        print(f'Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}')

    model.train()

In [81]:
# check accuracy on train and test data
print('Train Accuracy:')
check_accuracy(train_dataloader, model)
print('Test Accuracy:')
check_accuracy(test_dataloader, model)

Train Accuracy:
Got 5712 / 5712 with accuracy 100.00
Test Accuracy:
Got 1290 / 1311 with accuracy 98.40
