<a href="https://colab.research.google.com/github/zhalehk/Deep_Learning_Machine_Learning/blob/main/Untitled20.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!unzip "/content/drive/MyDrive/Lung_Cancer/Dataset.zip"

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: Dataset/squamous_cell_carcinoma/0000.jpg  
  inflating: Dataset/squamous_cell_carcinoma/0001.jpg  
  inflating: Dataset/squamous_cell_carcinoma/0002.jpg  
  inflating: Dataset/squamous_cell_carcinoma/0003.jpg  
  inflating: Dataset/squamous_cell_carcinoma/0004.jpg  
  inflating: Dataset/squamous_cell_carcinoma/0005.jpg  
  inflating: Dataset/squamous_cell_carcinoma/0006.jpg  
  inflating: Dataset/squamous_cell_carcinoma/0007.jpg  
  inflating: Dataset/squamous_cell_carcinoma/0008.jpg  
  inflating: Dataset/squamous_cell_carcinoma/0009.jpg  
  inflating: Dataset/squamous_cell_carcinoma/0010.jpg  
  inflating: Dataset/squamous_cell_carcinoma/0011.jpg  
  inflating: Dataset/squamous_cell_carcinoma/0012.jpg  
  inflating: Dataset/squamous_cell_carcinoma/0013.jpg  
  inflating: Dataset/squamous_cell_carcinoma/0014.jpg  
  inflating: Dataset/squamous_cell_carcinoma/0015.jpg  
  inflating: Dataset/squamous_cell_carc

In [2]:
# 1. Installation Requirements
'''
pip install gdown torch torchvision numpy matplotlib scikit-learn pillow
'''


'\npip install gdown torch torchvision numpy matplotlib scikit-learn pillow\n'

In [3]:
# 2. Imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
import os
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, f1_score
from google.colab import drive

In [4]:
class LungCancerDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.classes = ['adenocarcinoma', 'benign', 'squamous_cell_carcinoma']
        self.transform = transform
        self.image_paths = []
        self.labels = []

        if not os.path.exists(data_dir):
            raise FileNotFoundError(f"Data directory not found: {data_dir}")

        for idx, class_name in enumerate(self.classes):
            class_dir = os.path.join(data_dir, class_name)
            if not os.path.exists(class_dir):
                raise FileNotFoundError(f"Class directory not found: {class_dir}")

            print(f"Loading {class_name} images...")
            for img_name in os.listdir(class_dir):
                if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                    img_path = os.path.join(class_dir, img_name)
                    if os.path.isfile(img_path):
                        self.image_paths.append(img_path)
                        self.labels.append(idx)

        if len(self.image_paths) == 0:
            raise Exception("No images found in the dataset")

        print(f"\nDataset Summary:")
        print(f"Total images found: {len(self.image_paths)}")
        print(f"Class distribution: {self._get_class_distribution()}")

    def _get_class_distribution(self):
        return {self.classes[i]: self.labels.count(i) for i in range(len(self.classes))}

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        try:
            image = Image.open(img_path).convert('RGB')
        except Exception as e:
            print(f"Error loading image {img_path}: {str(e)}")
            image = Image.new('RGB', (224, 224))

        label = self.labels[idx]

        if self.transform:
            try:
                image = self.transform(image)
            except Exception as e:
                print(f"Error applying transform to image {img_path}: {str(e)}")
                image = torch.zeros(3, 224, 224)

        return image, label

In [5]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(64 * 56 * 56, 128)
        self.fc2 = nn.Linear(128, 3)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, 64 * 56 * 56)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x




In [6]:
def train_model(model, train_loader, val_loader, criterion, optimizer, device, epochs=10):
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_accuracy = 100 * correct / total

        model.eval()
        val_loss = 0
        correct = 0
        total = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                val_loss += criterion(outputs, labels).item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_accuracy = 100 * correct / total

        print(f'Epoch {epoch+1}/{epochs}:')
        print(f'Training Loss: {train_loss/len(train_loader):.4f}, Training Accuracy: {train_accuracy:.2f}%')
        print(f'Validation Loss: {val_loss/len(val_loader):.4f}, Validation Accuracy: {val_accuracy:.2f}%\n')


In [None]:
def main():
    try:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f'Using device: {device}')

        data_dir = '/content/Dataset'

        transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

        dataset = LungCancerDataset(data_dir=data_dir, transform=transform)

        kfold = KFold(n_splits=5, shuffle=True, random_state=42)

        for fold, (train_idx, val_idx) in enumerate(kfold.split(dataset)):
            print(f'Fold {fold+1}')

            train_subset = torch.utils.data.Subset(dataset, train_idx)
            val_subset = torch.utils.data.Subset(dataset, val_idx)

            train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
            val_loader = DataLoader(val_subset, batch_size=32, shuffle=False)

            model = CNN().to(device)
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.Adam(model.parameters(), lr=0.001)

            train_model(model, train_loader, val_loader, criterion, optimizer, device, epochs=10)

    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == '__main__':
    main()

Using device: cuda
Loading adenocarcinoma images...
Loading benign images...
Loading squamous_cell_carcinoma images...

Dataset Summary:
Total images found: 15000
Class distribution: {'adenocarcinoma': 5000, 'benign': 5000, 'squamous_cell_carcinoma': 5000}
Fold 1
Epoch 1/10:
Training Loss: 0.3806, Training Accuracy: 88.06%
Validation Loss: 0.1893, Validation Accuracy: 92.63%

