<a href="https://colab.research.google.com/github/vaibhavbajpai79/AIML/blob/main/CV_N.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.model_selection import train_test_split
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms

# Step 1: Download and Unzip the Dataset (From the provided notebook cell)
#   This part downloads and extracts the data.  It should be placed at the VERY beginning
#   of your notebook to ensure the data is available.

!pip install kaggle
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d darren2020/ct-to-mri-cgan
!unzip ct-to-mri-cgan.zip -d data/

# Step 2: Define Dataset Paths (Aligned with the data structure)
CT_DIR = "data/Dataset/images/trainA/"
MRI_DIR = "data/Dataset/images/trainB/" # This path is defined, but MRI data is not used in OCSVM

# Step 3: Define CTMRIDataset and DataLoader (Using PyTorch)
class CTMRIDataset(Dataset):
    def __init__(self, ct_dir, transform=None): #Simplified: Only CT needed for OCSVM
        self.ct_images = sorted([os.path.join(ct_dir, f) for f in os.listdir(ct_dir) if f.endswith('.png')])
        self.transform = transform
        print(f"Number of CT images: {len(self.ct_images)}") # Print length

    def __len__(self):
        return len(self.ct_images) #Simplified: only depends on CT

    def __getitem__(self, idx):
        ct_img = cv2.imread(self.ct_images[idx], cv2.IMREAD_GRAYSCALE) # Read CT image
        ct_img = Image.fromarray(ct_img) # Convert to PIL Image

        if self.transform:
            ct_img = self.transform(ct_img)

        return ct_img #Return only the CT image

# Define transformations (Resizing, Normalization, and conversion to NumPy array)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])  # Normalize to [-1, 1]
])

# Load dataset
dataset = CTMRIDataset(CT_DIR, transform=transform)
train_loader = DataLoader(dataset, batch_size=16, shuffle=True)

# Step 4: Feature Extraction using CNN (Using PyTorch)
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(64 * 56 * 56, 128) #Adjust the input size
        self.relu3 = nn.ReLU()

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu3(x)
        return x

# Instantiate the CNN
cnn = SimpleCNN()

# Check if CUDA is available and use it if possible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
cnn.to(device) # Move the CNN to the GPU if available

def extract_features(dataloader, model, device):
    features = []
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Disable gradient calculation
        for batch in dataloader:
            batch = batch.to(device)  # Move to device directly, no need to unsqueeze
            output = model(batch)
            features.append(output.cpu().numpy())  # Move to CPU and convert to NumPy array
    return np.concatenate(features, axis=0)

# Extract training features
train_features = extract_features(train_loader, cnn, device)

# Step 5: Data Scaling
scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features)

# Step 6: One-Class SVM Training
ocsvm = OneClassSVM(kernel='rbf', nu=0.01)  # Adjust nu as needed
ocsvm.fit(train_features_scaled)

# Step 7: Load Anomaly Data and Extract Features for Testing
# In practice, you would load anomaly data (e.g., from another directory)
# and extract features from it using the SAME CNN and scaling.
# For demonstration, let's create some dummy anomaly data by adding noise.
# This is just for example: replace this with real anomaly data!

# Step 7a: Create Dummy Anomaly Data
def create_dummy_anomaly_data(normal_data, num_anomalies, noise_level=0.2):
    num_samples, img_height, img_width = normal_data.shape[0], normal_data[0].shape[1], normal_data[0].shape[2]
    anomaly_indices = np.random.choice(num_samples, num_anomalies, replace=False)
    anomaly_data = normal_data[anomaly_indices].copy()
    noise = np.random.normal(0, noise_level, anomaly_data.shape)
    anomaly_data = np.clip(anomaly_data + noise, -1, 1)  # Clip values to be within [-1, 1]
    return anomaly_data, anomaly_indices

#Number of anomalies, adjust this
num_anomalies = 50
dummy_anomalies, anomaly_indices = create_dummy_anomaly_data(normal_ct_images, num_anomalies)

# Create test dataset with normal and anomalies
test_data = np.concatenate([normal_ct_images, dummy_anomalies])

#Create labels (1 for normal, -1 for anomaly)
test_labels = np.concatenate([np.ones(len(normal_ct_images)), -np.ones(len(dummy_anomalies))])
np.random.shuffle(test_labels) #shuffle labels

#Create a test dataloader to feed into feature extraction
class TestDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data[idx]
        image = Image.fromarray((image*255).astype(np.uint8).squeeze()) #Convert back to PIL Image
        if self.transform:
            image = self.transform(image)

        return image

# Create a combined test dataset and dataloader
test_dataset = TestDataset(test_data, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False) #No need to shuffle test data

# Step 7b: Extract Test Features
test_features = extract_features(test_loader, cnn, device)

# Step 8: Data Scaling for test features
test_features_scaled = scaler.transform(test_features)

# Step 9: Anomaly Scoring with One-Class SVM
ocsvm_scores = ocsvm.decision_function(test_features_scaled)

# Step 10: Evaluation using AUC-ROC
auc = roc_auc_score(test_labels, ocsvm_scores)
print(f"AUC-ROC: {auc}")

# Step 11: Visualize ROC Curve
fpr, tpr, thresholds = roc_curve(test_labels, ocsvm_scores)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'One-Class SVM (AUC = {auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--', label='Random Guessing')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.show()



mkdir: cannot create directory ‘/root/.kaggle’: File exists
cp: cannot stat 'kaggle.json': No such file or directory
chmod: cannot access '/root/.kaggle/kaggle.json': No such file or directory
Dataset URL: https://www.kaggle.com/datasets/darren2020/ct-to-mri-cgan
License(s): CC-BY-NC-SA-4.0
ct-to-mri-cgan.zip: Skipping, found more recently modified local copy (use --force to force download)
Archive:  ct-to-mri-cgan.zip
replace data/Dataset/images/testA/ct10.png? [y]es, [n]o, [A]ll, [N]one, [r]ename: 