In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/ct-kidney-dataset-normal-cyst-tumor-and-stone/kidneyData.csv
/kaggle/input/ct-kidney-dataset-normal-cyst-tumor-and-stone/CT-KIDNEY-DATASET-Normal-Cyst-Tumor-Stone/CT-KIDNEY-DATASET-Normal-Cyst-Tumor-Stone/Cyst/Cyst- (3178).jpg
/kaggle/input/ct-kidney-dataset-normal-cyst-tumor-and-stone/CT-KIDNEY-DATASET-Normal-Cyst-Tumor-Stone/CT-KIDNEY-DATASET-Normal-Cyst-Tumor-Stone/Cyst/Cyst- (3561).jpg
/kaggle/input/ct-kidney-dataset-normal-cyst-tumor-and-stone/CT-KIDNEY-DATASET-Normal-Cyst-Tumor-Stone/CT-KIDNEY-DATASET-Normal-Cyst-Tumor-Stone/Cyst/Cyst- (900).jpg
/kaggle/input/ct-kidney-dataset-normal-cyst-tumor-and-stone/CT-KIDNEY-DATASET-Normal-Cyst-Tumor-Stone/CT-KIDNEY-DATASET-Normal-Cyst-Tumor-Stone/Cyst/Cyst- (1148).jpg
/kaggle/input/ct-kidney-dataset-normal-cyst-tumor-and-stone/CT-KIDNEY-DATASET-Normal-Cyst-Tumor-Stone/CT-KIDNEY-DATASET-Normal-Cyst-Tumor-Stone/Cyst/Cyst- (268).jpg
/kaggle/input/ct-kidney-dataset-normal-cyst-tumor-and-stone/CT-KIDNEY-DATASET-Normal-Cyst-Tumor-S

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score, accuracy_score
import numpy as np
import os
import cv2

# Dataset Class
class KidneyDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

# Data Preprocessing Function
def load_and_preprocess_images(folder_path, subfolders, image_size=(224, 224)):
    label_map = {subfolder: idx for idx, subfolder in enumerate(subfolders)}
    images = []
    labels = []
    
    for subfolder in subfolders:
        subfolder_path = os.path.join(folder_path, subfolder)
        for filename in os.listdir(subfolder_path):
            img_path = os.path.join(subfolder_path, filename)
            img = cv2.imread(img_path)
            if img is not None:
                img_resized = cv2.resize(img, image_size)
                images.append(img_resized)
                labels.append(label_map[subfolder])
    
    images = np.array(images).astype('float32') / 255.0
    labels = np.array(labels)
    return images, labels

# Load and preprocess images
folder_path = "/kaggle/input/ct-kidney-dataset-normal-cyst-tumor-and-stone/CT-KIDNEY-DATASET-Normal-Cyst-Tumor-Stone/CT-KIDNEY-DATASET-Normal-Cyst-Tumor-Stone"
subfolders = ['Cyst', 'Normal', 'Stone', 'Tumor']
images, labels = load_and_preprocess_images(folder_path, subfolders)

# Split into training, validation, and test sets
x_train, x_temp, y_train, y_temp = train_test_split(
    images, labels, test_size=0.4, random_state=42, stratify=labels
)
x_val, x_test, y_val, y_test = train_test_split(
    x_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

# Define transforms for data augmentation
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Create PyTorch datasets and dataloaders
train_dataset = KidneyDataset(x_train, y_train, transform=transform)
val_dataset = KidneyDataset(x_val, y_val, transform=transform)
test_dataset = KidneyDataset(x_test, y_test, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Load Pretrained VGG16 Model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
base_model = models.vgg16(pretrained=True)

# Freeze all layers except the classifier
for param in base_model.features.parameters():
    param.requires_grad = False

# Modify the classifier
num_classes = 4
base_model.classifier = nn.Sequential(
    nn.Linear(25088, 4096),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(4096, 1024),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(1024, num_classes),
)
base_model = base_model.to(device)

# Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(base_model.classifier.parameters(), lr=0.0001)

# Training Loop
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=30):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        train_accuracy = 100 * correct / total
        val_accuracy, _, _, _ = evaluate_model(model, val_loader)  # Get only the accuracy value
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}, Train Acc: {train_accuracy:.2f}%, Val Acc: {val_accuracy:.2f}%")

# Evaluation Function
def evaluate_model(model, loader):
    model.eval()
    correct = 0
    total = 0
    all_labels = []
    all_preds = []
    all_probs = []
    
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            probs = torch.softmax(outputs, dim=1)
            _, predicted = outputs.max(1)
            
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(predicted.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = 100 * correct / total
    return accuracy, np.array(all_labels), np.array(all_preds), np.array(all_probs)

# Train the model
train_model(base_model, train_loader, val_loader, criterion, optimizer, epochs=30)

# Evaluate on Test Set
test_accuracy, test_labels, test_preds, test_probs = evaluate_model(base_model, test_loader)

# Metrics Calculation
print("\nClassification Report:")
print(classification_report(test_labels, test_preds, target_names=subfolders))

# AUC for each class
auc_scores = roc_auc_score(
    np.eye(num_classes)[test_labels], test_probs, multi_class='ovr', average=None
)
for i, class_name in enumerate(subfolders):
    print(f"AUC for {class_name}: {auc_scores[i]:.2f}")

# Print overall test accuracy
print(f"\nTest Accuracy: {test_accuracy:.2f}%")




Epoch 1/30, Loss: 0.2774, Train Acc: 89.96%, Val Acc: 99.32%
Epoch 2/30, Loss: 0.0229, Train Acc: 99.30%, Val Acc: 99.32%
Epoch 3/30, Loss: 0.0222, Train Acc: 99.26%, Val Acc: 99.96%
Epoch 4/30, Loss: 0.0142, Train Acc: 99.52%, Val Acc: 99.88%
Epoch 5/30, Loss: 0.0396, Train Acc: 98.75%, Val Acc: 99.72%
Epoch 6/30, Loss: 0.0423, Train Acc: 98.81%, Val Acc: 96.58%
Epoch 7/30, Loss: 0.0276, Train Acc: 99.18%, Val Acc: 99.28%
Epoch 8/30, Loss: 0.0105, Train Acc: 99.64%, Val Acc: 99.96%
Epoch 9/30, Loss: 0.0061, Train Acc: 99.83%, Val Acc: 99.48%
Epoch 10/30, Loss: 0.0130, Train Acc: 99.60%, Val Acc: 99.68%
Epoch 11/30, Loss: 0.0091, Train Acc: 99.77%, Val Acc: 99.68%
Epoch 12/30, Loss: 0.0264, Train Acc: 99.41%, Val Acc: 99.80%
Epoch 13/30, Loss: 0.0196, Train Acc: 99.48%, Val Acc: 99.88%
Epoch 14/30, Loss: 0.0251, Train Acc: 99.40%, Val Acc: 99.88%
Epoch 15/30, Loss: 0.0317, Train Acc: 99.46%, Val Acc: 99.96%
Epoch 16/30, Loss: 0.0074, Train Acc: 99.83%, Val Acc: 98.83%
Epoch 17/30, Loss

In [3]:
torch.save(base_model.state_dict(), '/kaggle/working/kidney_model.pth')
