In [None]:
import os
import numpy as np
import torch
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torch.utils.data as data
import torchvision.transforms as transforms
from torchvision.models import resnet18, ResNet18_Weights
import torch.nn.functional as F
from pydicom import Dataset
from tqdm import tqdm
from medmnist import BreastMNIST
from medmnist import INFO
import pennylane as qml
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve, auc, confusion_matrix


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    print(f"Using: {torch.cuda.get_device_name(0)}")
    print(f"CUDA: {torch.version.cuda}")
else:
    print("CUDA is not available. Using CPU.")

In [None]:
info = INFO['breastmnist']
data_flag = 'breastmnist'
DataClass = BreastMNIST

task = info['task']  
n_channels = info['n_channels']
n_classes = len(info['label'])

print(f"Number of classes:", n_classes)
print(f"Number of channels: {n_channels}")

train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),  
    transforms.RandomRotation(degrees=15),  
    transforms.ToTensor(),
    transforms.Normalize(mean=[.5], std=[.5]),
    lambda x: x.unsqueeze(0)
])

eval_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[.5], std=[.5]),
    lambda x: x.unsqueeze(0)
])

In [None]:
size = 28

data_train = DataClass(split='train', transform=train_transform, download=True, size=size)
data_test = DataClass(split='test', transform=eval_transform, download=True, size=size)
data_eval = DataClass(split='val', transform=eval_transform, download=True, size=size)

In [None]:
batch_size = 32 

dataloader_train = data.DataLoader(dataset=data_train, batch_size=batch_size, shuffle=True)
dataloader_test = data.DataLoader(dataset=data_test, batch_size=batch_size, shuffle=False)
dataloader_eval = data.DataLoader(dataset=data_eval, batch_size=batch_size, shuffle=False)

print(f"\nNumber of images in training dataset: {len(data_train)}")
print(f"Number of images in test dataset: {len(data_test)}")
print(f"Number of images in validation dataset: {len(data_eval)}")

In [None]:
from torchvision.models import resnet18, ResNet18_Weights

model = resnet18(weights=ResNet18_Weights.DEFAULT)

# Freeze model parameters
for param in model.parameters():
    param.requires_grad = True

# Change the final layer of VGG16 Model for Transfer Learning
model.fc = nn.Sequential(
    nn.Linear(512, 256),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(256, 128),
    nn.ReLU(),
    nn.Linear(128, 1),
    #nn.ReLU(),
    #nn.Dropout(0.4),
    #nn.Linear(64, 1),
    nn.Sigmoid()
)
print(model)
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.BCELoss().to(device)
epochs = 400

In [None]:
import torch
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

train_losses, train_accuracies = [], []
val_losses, val_accuracies = [], []
val_precisions, val_recalls, val_f1_scores, val_aucs = [], [], [], []

for epoch in range(epochs):
    print(f"Epoch: {epoch+1}/{epochs}")

    # Treinamento
    model.train()
    total_loss, total_acc = 0.0, 0.0
    for inputs, labels in dataloader_train:
        inputs, labels = inputs.squeeze(1).to(device), labels.squeeze().to(device).float()
        inputs = inputs.repeat(1, 3, 1, 1)  

        optimizer.zero_grad()
        outputs = model(inputs).squeeze()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_acc += (outputs.round() == labels).float().mean().item()

    train_losses.append(total_loss / len(dataloader_train))
    train_accuracies.append(total_acc / len(dataloader_train))
    
    print(f"Train Loss: {train_losses[-1]:.4f} | Train Acc: {train_accuracies[-1]:.4f}")

    model.eval()
    val_loss, val_acc = 0.0, 0.0
    val_labels, val_predictions, val_probs = [], [], []

    with torch.no_grad():
        for inputs, labels in dataloader_eval:
            inputs, labels = inputs.squeeze(1).to(device), labels.squeeze().to(device).float()
            inputs = inputs.repeat(1, 3, 1, 1)  

            outputs = model(inputs).squeeze()
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            probs = torch.sigmoid(outputs)  
            val_labels.extend(labels.cpu().numpy())
            val_predictions.extend(probs.round().cpu().numpy())
            val_probs.extend(probs.cpu().numpy())

    val_losses.append(val_loss / len(dataloader_eval))
    val_accuracies.append(accuracy_score(val_labels, val_predictions))
    val_precision = precision_score(val_labels, val_predictions, average='weighted', zero_division=0)
    val_recall = recall_score(val_labels, val_predictions, average='weighted', zero_division=0)
    val_f1 = f1_score(val_labels, val_predictions, average='weighted', zero_division=0)
    auc_val = roc_auc_score(val_labels, val_probs)

    val_precisions.append(val_precision)
    val_recalls.append(val_recall)
    val_f1_scores.append(val_f1)
    val_aucs.append(auc_val)

    print(f"Val Loss: {val_losses[-1]:.4f} | Acc: {val_accuracies[-1]:.4f} | Precision: {val_precision*100:.4f} | Recall: {val_recall*100:.4f} | F1: {val_f1*100:.4f} | AUC: {auc_val*100:.4f}")

    torch.save(model.state_dict(), "last_model.pth")

print("Saved 'last_model.pth'")


In [None]:
conf_matrix = confusion_matrix(val_labels, val_predictions)
plt.figure(figsize=(6, 4))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Negativo', 'Positivo'], yticklabels=['Negativo', 'Positivo'])
plt.xlabel('Predito')
plt.ylabel('Real')
plt.title('Matriz de Confusão')
plt.show()

In [None]:
plt.figure()
plt.plot(false_positive_rate_v, true_positive_rate_v, color='blue', lw=2, label=f'ROC curve (AUC = {auc_val:.2f})')
plt.plot([0, 1], [0, 1], color='grey', linestyle='--') 
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(range(1, epochs + 1), train_losses, label="Training Loss", marker='o')
plt.plot(range(1, epochs + 1), val_losses, label="Validation Loss", marker='x')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Training and Validation Loss Over Epochs")
plt.legend()
plt.grid(True)
plt.show()

plt.figure(figsize=(10, 6))
plt.plot(range(1, epochs + 1), val_accuracies, label="Validation Accuracy", marker='s', color='g')
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("Validation Accuracy Over Epochs")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(range(1, epochs + 1), train_losses, label="Training Loss", marker='o')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Training Loss Over Epochs")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
test_loss = 0.0
test_labels, test_predictions, test_probs = [], [], []

model.eval()
with torch.no_grad():
    for inputs, labels in dataloader_test:
        inputs = inputs.squeeze(1).to(device)
        inputs = inputs.repeat(1, 3, 1, 1) 
        labels = labels.squeeze().to(device).type(torch.float)

        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels)
        test_loss += loss.item()

        test_labels.extend(labels.cpu().numpy())
        test_predictions.extend(outputs.round().cpu().numpy())

        probs = torch.sigmoid(outputs) if outputs.shape[1] == 1 else torch.softmax(outputs, dim=1)
        test_probs.extend(probs.cpu().numpy())

test_labels = np.array(test_labels)
test_predictions = np.array(test_predictions)
test_probs = np.array(test_probs)


test_accuracy = accuracy_score(test_labels, test_predictions)
test_precision = precision_score(test_labels, test_predictions, average="weighted", zero_division=0)
test_recall = recall_score(test_labels, test_predictions, average="weighted", zero_division=0)
test_f1 = f1_score(test_labels, test_predictions, average="weighted", zero_division=0)
test_auc = roc_auc_score(test_labels, test_probs[:, 0])

print("\nFinal Test Evaluation:")
print(f"Test Loss: {test_loss / len(dataloader_test):.4f}")
print(f"Test Accuracy: {test_accuracy*100:.4f}")
print(f"Test Precision: {test_precision*100:.4f}")
print(f"Test Recall: {test_recall*100:.4f}")
print(f"Test F1 Score: {test_f1*100:.4f}")
print(f"Test AUC: {test_auc*100:.4f}")


In [None]:
false_positive_rate, true_positive_rate, thresholds = roc_curve(
    test_labels, test_probs
)
roc_auc = auc(false_positive_rate, true_positive_rate)

plt.figure()
plt.plot(false_positive_rate, true_positive_rate, color='blue', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='grey', linestyle='--') 
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.grid()
plt.show()

dataset_name = "BreastMNIST"
roc_data = pd.DataFrame({
    'Dataset': [dataset_name] * len(false_positive_rate),
    'False Positive Rate': false_positive_rate,
    'True Positive Rate': true_positive_rate,
    'Thresholds': thresholds
})
roc_data.to_csv(f'/home/eflammere/BreastCancerQuanvolution/Classic/checkpoints/BreastMNIST/224x224/roc_curve_data_{dataset_name}.csv', index=False)

print(f"ROC curve data exported to 'roc_curve_data_{dataset_name}.csv'")

In [None]:
conf_matrix = confusion_matrix(test_labels, test_predictions) 
plt.figure(figsize=(6, 4))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Negativo', 'Positivo'], yticklabels=['Negativo', 'Positivo'])
plt.xlabel('Predito')
plt.ylabel('Real')
plt.title('Matriz de Confusão')
plt.show()