# Use Runtime as T4 to complete training faster

In [None]:
!pip install torch torchvision pillow tqdm



In [None]:
!pip install kaggle



In [None]:
!kaggle datasets download -d rm1000/lung-cancer-histopathological-images

Dataset URL: https://www.kaggle.com/datasets/rm1000/lung-cancer-histopathological-images
License(s): CC-BY-SA-4.0
Downloading lung-cancer-histopathological-images.zip to /content
100% 1.55G/1.55G [00:16<00:00, 68.6MB/s]
100% 1.55G/1.55G [00:16<00:00, 98.6MB/s]


In [None]:
import zipfile
import os

# Define the path to the zip file and the destination folder
zip_file_path = "lung-cancer-histopathological-images.zip"
destination_folder = "/content/dataset"

# Create the destination folder if it doesn't exist
os.makedirs(destination_folder, exist_ok=True)

# Unzip the file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(destination_folder)

print(f"Unzipped files to {destination_folder}")


Unzipped files to /content/dataset


In [None]:
!ls

lung-cancer-histopathological-images.zip  sample_data


In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset, random_split
from PIL import Image
import os
from tqdm import tqdm

In [None]:
class LungCancerDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = os.listdir(root_dir)
        self.file_list = []
        for class_name in self.classes:
            class_path = os.path.join(root_dir, class_name)
            for file_name in os.listdir(class_path):
                self.file_list.append((os.path.join(class_path, file_name), self.classes.index(class_name)))

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        img_path, label = self.file_list[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [None]:
full_dataset = LungCancerDataset(root_dir='dataset', transform=transform)

# Split the dataset into train and test
total_size = len(full_dataset)
train_size = total_size // 3    # reduced dataset size to prevent using all ram available during training
test_size = total_size - train_size

print(train_size)

train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

5000


## Test the saved model with test datset

In [None]:
!pip install gdown



In [None]:
!gdown --id 1HzGKS9unJ8fX9hhLWfVpApkSDZGsu_eO

Downloading...
From (original): https://drive.google.com/uc?id=1HzGKS9unJ8fX9hhLWfVpApkSDZGsu_eO
From (redirected): https://drive.google.com/uc?id=1HzGKS9unJ8fX9hhLWfVpApkSDZGsu_eO&confirm=t&uuid=fe2a1b90-22f1-4f68-8dbd-543aed546d93
To: /content/lung_cancer_model_version1.0.pth
100% 94.4M/94.4M [00:00<00:00, 139MB/s]


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet50(pretrained=False)
num_classes = 3
model.fc = nn.Linear(model.fc.in_features, num_classes)
model.load_state_dict(torch.load('lung_cancer_model_version1.0.pth', map_location=device))
model.to(device)
model.eval()


correct = 0
total = 0
class_correct = list(0. for i in range(num_classes))
class_total = list(0. for i in range(num_classes))

with torch.no_grad():
    for inputs, labels in tqdm(test_loader, desc='Testing'):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Compute accuracy for each class
        c = (predicted == labels).squeeze()
        for i in range(len(labels)):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

# Print overall accuracy
accuracy = 100 * correct / total
print(f'Overall Test Accuracy: {accuracy:.2f}%')

# Print accuracy for each class
for i in range(num_classes):
    class_accuracy = 100 * class_correct[i] / class_total[i]
    print(f'Accuracy of class {i}: {class_accuracy:.2f}%')


  model.load_state_dict(torch.load('lung_cancer_model_version1.0.pth', map_location=device))
Testing: 100%|██████████| 313/313 [01:51<00:00,  2.81it/s]

Overall Test Accuracy: 98.20%
Accuracy of class 0: 97.66%
Accuracy of class 1: 96.96%
Accuracy of class 2: 99.97%





In [None]:
from sklearn.metrics import confusion_matrix
import numpy as np

all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

cm = confusion_matrix(all_labels, all_preds)
print("Confusion Matrix:")
print(cm)

Confusion Matrix:
[[3261   77    1]
 [ 101 3218    0]
 [   1    0 3341]]


## Training the model

In [None]:
model = models.resnet50(pretrained=True)
num_classes = 3
model.fc = nn.Linear(model.fc.in_features, num_classes)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 170MB/s]


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}', unit='batch')

    for inputs, labels in pbar:
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        pbar.set_postfix({'loss': f'{loss.item():.4f}'})

    epoch_loss = running_loss / len(train_loader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')

# Testing loop
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in tqdm(test_loader, desc='Testing'):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Test Accuracy: {accuracy:.2f}%')

# Save the model
torch.save(model.state_dict(), 'lung_cancer_model.pth')

Epoch 1/10: 100%|██████████| 157/157 [01:29<00:00,  1.75batch/s, loss=0.3950]


Epoch [1/10], Loss: 0.2402


Epoch 2/10: 100%|██████████| 157/157 [01:28<00:00,  1.78batch/s, loss=1.0953]


Epoch [2/10], Loss: 0.1501


Epoch 3/10: 100%|██████████| 157/157 [01:28<00:00,  1.77batch/s, loss=0.0095]


Epoch [3/10], Loss: 0.1338


Epoch 4/10: 100%|██████████| 157/157 [01:28<00:00,  1.78batch/s, loss=0.5487]


Epoch [4/10], Loss: 0.0845


Epoch 5/10: 100%|██████████| 157/157 [01:28<00:00,  1.78batch/s, loss=0.2706]


Epoch [5/10], Loss: 0.0748


Epoch 6/10: 100%|██████████| 157/157 [01:27<00:00,  1.79batch/s, loss=0.0002]


Epoch [6/10], Loss: 0.0884


Epoch 7/10: 100%|██████████| 157/157 [01:28<00:00,  1.77batch/s, loss=0.1248]


Epoch [7/10], Loss: 0.0410


Epoch 8/10: 100%|██████████| 157/157 [01:28<00:00,  1.78batch/s, loss=0.0655]


Epoch [8/10], Loss: 0.0478


Epoch 9/10: 100%|██████████| 157/157 [01:28<00:00,  1.77batch/s, loss=0.3431]


Epoch [9/10], Loss: 0.0586


Epoch 10/10: 100%|██████████| 157/157 [01:28<00:00,  1.78batch/s, loss=0.0043]


Epoch [10/10], Loss: 0.0346


Testing: 100%|██████████| 313/313 [01:50<00:00,  2.84it/s]


Test Accuracy: 98.20%
