In [2]:
import os
import random
import shutil

import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# 1. Get data

In [3]:

dataset_dir = "covid-dataset"
class_names = ['COVID', 'Normal', 'Viral Pneumonia']
train_dir = os.path.join(dataset_dir, 'train')
test_dir = os.path.join(dataset_dir, "test")

# Make directories with each class name
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)
for class_name in class_names:
    os.makedirs(os.path.join(train_dir, class_name), exist_ok=True)
    os.makedirs(os.path.join(test_dir, class_name), exist_ok=True)

# Train and test ratio
split_ratio = 0.8
covid_train_split = int(len(os.listdir("covid-dataset/COVID/images/")) * split_ratio)
normal_train_split = int(len(os.listdir("covid-dataset/Normal/images/")) * split_ratio)
pneumonia_train_split = int(len(os.listdir("covid-dataset/Viral Pneumonia/images/")) * split_ratio)

# Shuffle directories
for class_name in class_names:
    random.shuffle(os.listdir(dataset_dir + "/" + class_name + "/images"))

# Move images into train and test
covid_src_images = os.listdir("covid-dataset/COVID/images/")
train_covid_images = covid_src_images[:covid_train_split]
test_covid_images = covid_src_images[covid_train_split:]
for image in train_covid_images:
    image_path = os.path.join("covid-dataset/COVID/images/", image)
    shutil.move(image_path, "covid-dataset/train/COVID")
for image in test_covid_images:
    image_path = os.path.join("covid-dataset/COVID/images/", image)
    shutil.move(image_path, "covid-dataset/test/COVID")

normal_src_images = os.listdir("covid-dataset/Normal/images/")
train_normal_images = normal_src_images[:normal_train_split]
test_normal_images = normal_src_images[normal_train_split:]
for image in train_normal_images:
    image_path = os.path.join("covid-dataset/Normal/images/", image)
    shutil.move(image_path, "covid-dataset/train/Normal")
for image in test_normal_images:
    image_path = os.path.join("covid-dataset/Normal/images/", image)
    shutil.move(image_path, "covid-dataset/test/Normal")

pneumonia_src_images = os.listdir("covid-dataset/Viral Pneumonia/images/")
train_pneumonia_images = pneumonia_src_images[:pneumonia_train_split]
test_pneumonia_images = pneumonia_src_images[pneumonia_train_split:]
for image in train_pneumonia_images:
    image_path = os.path.join("covid-dataset/Viral Pneumonia/images/", image)
    shutil.move(image_path, "covid-dataset/train/Viral Pneumonia")
for image in test_pneumonia_images:
    image_path = os.path.join("covid-dataset/Viral Pneumonia/images/", image)
    shutil.move(image_path, "covid-dataset/test/Viral Pneumonia")

print("Files moved successfully!")



Files moved successfully!


# 2. Dataset and DataLoader

In [4]:
# HYPERPARAMETERS
BATCH_SIZE=32
NUM_WORKERS=os.cpu_count()

In [5]:
data_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.ToTensor()
])

train_data = datasets.ImageFolder(root=train_dir, transform=data_transforms)
test_data = datasets.ImageFolder(root=test_dir, transform=data_transforms)
class_names = train_data.classes
print(class_names)
print(f"Train data: {train_data} \n Test data: {test_data}")

['COVID', 'Normal', 'Viral Pneumonia']
Train data: Dataset ImageFolder
    Number of datapoints: 12121
    Root location: covid-dataset\train
    StandardTransform
Transform: Compose(
               Resize(size=256, interpolation=bilinear, max_size=None, antialias=warn)
               ToTensor()
           ) 
 Test data: Dataset ImageFolder
    Number of datapoints: 3032
    Root location: covid-dataset\test
    StandardTransform
Transform: Compose(
               Resize(size=256, interpolation=bilinear, max_size=None, antialias=warn)
               ToTensor()
           )


In [6]:
train_dataloader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=True)
test_dataloader = DataLoader(dataset=test_data,batch_size=BATCH_SIZE, num_workers=NUM_WORKERS)

# 2.1 Turn it into a script

In [7]:
os.makedirs("scripts", exist_ok=True)

In [8]:
%%writefile scripts/data_setup.py
"""
Putting data into Imagefolder and Dataloader
"""

import os
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

BATCH_SIZE=32
NUM_WORKERS=os.cpu_count()

def create_dataloaders(train_dir, test_dir, transform, batch_size, num_workers):
    train_data = datasets.ImageFolder(root=train_dir, transform=transform)
    test_data = datasets.ImageFolder(root=test_dir, transform=transform)

    class_name = train_data.classes

    train_dataloader = DataLoader(dataset=train_data, batch_size=batch_size, num_workers=num_workers, shuffle=True)
    test_dataloader = DataLoader(dataset=test_data,batch_size=batch_size, num_workers=num_workers)

    return train_dataloader, test_dataloader, class_names

Overwriting scripts/data_setup.py


# 3. Model (CovidAid)

In [149]:

class CovidAidModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.covid_aid_1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=8, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(8),
            nn.LeakyReLU()
        )
        self.covid_aid_2 = nn.Sequential(
            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(16),
            nn.LeakyReLU()
        )
        self.covid_aid_3 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=1, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU()
        )
        self.covid_aid_4 = nn.Sequential(
            nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU()
        )
        self.covid_aid_5 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU()
        )
        self.covid_aid_6 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU()
        )
        self.covid_aid_7 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=3, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(3),
            nn.LeakyReLU()
        )
        
        self.covid_aid_block_1 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(),

            nn.Conv2d(in_channels=32, out_channels=16, kernel_size=1, stride=1, padding=1),
            nn.BatchNorm2d(16),
            nn.LeakyReLU(),

            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(),
        )
        self.covid_aid_block_2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(),

            nn.Conv2d(in_channels=64, out_channels=32, kernel_size=1, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(),

            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(),
        )
        self.covid_aid_block_3 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(),

            nn.Conv2d(in_channels=128, out_channels=64, kernel_size=1, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(),
        )

        self.covid_aid_block_4 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(),

            nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(),

            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(),
        )

        self.maxpool_1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.maxpool_2 = nn.MaxPool2d(kernel_size=1, stride=1)
        self.flatten = nn.Flatten()
        self.linear = nn.Linear(363, 3)

    def forward(self, x):
        x = self.covid_aid_1(x)
        x = self.maxpool_1(x)
        x = self.covid_aid_2(x)
        x = self.maxpool_1(x)
        x = self.covid_aid_block_1(x)
        x = self.maxpool_1(x)
        x = self.covid_aid_block_2(x)
        x = self.maxpool_1(x)
        x = self.covid_aid_block_3(x)
        x = self.maxpool_1(x)
        x = self.covid_aid_block_4(x)
        x = self.maxpool_1(x)
        x = self.covid_aid_3(x)
        x = self.covid_aid_4(x)
        x = self.covid_aid_5(x)
        x = self.covid_aid_6(x)
        x = self.covid_aid_7(x)
        x = self.flatten(x)
        x = self.linear(x)
        return x
    

In [150]:
test_img = torch.rand(32,3,256,256)
model = CovidAidModel()
model(test_img)

torch.Size([32, 8, 256, 256])
torch.Size([32, 16, 128, 128])
torch.Size([32, 32, 66, 66])
torch.Size([32, 64, 35, 35])
torch.Size([32, 128, 19, 19])
torch.Size([32, 256, 11, 11])
torch.Size([32, 512, 7, 7])
torch.Size([32, 256, 9, 9])
torch.Size([32, 128, 11, 11])
torch.Size([32, 256, 11, 11])
torch.Size([32, 3, 11, 11])
torch.Size([32, 363])


tensor([[ 0.4448, -0.1802,  0.3612],
        [ 0.2820, -0.1401,  0.1586],
        [ 0.4209, -0.6287, -0.2507],
        [ 0.1778, -0.0448, -0.5364],
        [ 0.0831, -0.1157, -0.0334],
        [ 0.2719, -0.0613, -0.3318],
        [ 0.4028, -0.1875, -0.3016],
        [-0.0590,  0.1174, -0.3959],
        [ 0.4976, -0.1635, -0.5833],
        [ 0.3841, -0.3567,  0.2542],
        [-0.2299,  0.0499, -0.4045],
        [-0.2499, -0.3341,  0.2130],
        [ 0.4143, -0.4366, -0.0422],
        [ 0.0070,  0.1843, -0.2757],
        [ 0.2843, -0.2930,  0.4217],
        [ 0.2519, -0.5915, -0.0353],
        [ 0.0359, -0.0201, -0.3710],
        [-0.0137, -0.0333,  0.6405],
        [ 0.0747, -0.2403, -0.0021],
        [-0.1661, -0.4058, -0.1379],
        [ 0.5806, -0.1805, -0.1671],
        [-0.1874,  0.1722,  0.0017],
        [ 0.2193, -0.0506, -0.5232],
        [-0.1687, -0.1926,  0.1634],
        [-0.0058, -0.2491, -0.1953],
        [ 0.5129,  0.0940,  0.0367],
        [ 0.0690, -0.1021, -0.2258],
 

In [30]:
def conv_output_size(input_size, filter_size, stride, padding):
    print((((input_size - filter_size + 2*padding)/stride)+1))

conv_output_size(256, 3,1,1)

256.0


In [32]:
test_img = torch.rand(3,256,256)
conv = nn.Conv2d(in_channels=3, out_channels=8, kernel_size=3, stride=1, padding=1)
batch_norm = nn.BatchNorm2d()
a = conv(test_img)
print(a.shape)
a = batch_norm(a)
print(a.shape)

torch.Size([8, 256, 256])


ValueError: expected 4D input (got 3D input)