In [None]:
# Uninstall the current numpy version
!pip uninstall numpy -y

# Install numpy version 1.21.6 (which is compatible with TensorFlow 2.10)
!pip install numpy==1.21.6

# Downgrade pandas to a version compatible with numpy 1.21.6
!pip install pandas==1.3.5

# Reinstall TensorFlow to ensure compatibility
!pip uninstall tensorflow -y
!pip install tensorflow==2.10

!pip uninstall matplotlib -y
!pip install matplotlib==3.4.3

# Check that all packages are correctly installed and versions are compatible
!pip list

In [None]:
# Original imports
import tensorflow as tf
from tensorflow.keras.preprocessing import image
import numpy as np
import pandas as pd
import os
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Additional imports for Grad-CAM
import torch
from torchvision import models, transforms
from torch.nn import functional as F
from PIL import Image
import matplotlib.pyplot as plt

In [None]:
# Load the saved model
model_path = 'saved_model_resnet101_simple_entire_cohort_800_600_May.24th_no_masking800_600/resnet152v2_1'
model = tf.keras.models.load_model(model_path)

In [None]:
# Function to preprocess the image before inference
def preprocess_image(image_path):
    img = load_img(image_path, target_size=(800, 600))
    img_array = img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = img_array / 255.0  # Normalize pixel values
    return img_array

# Function to preprocess the image for Grad-CAM (PyTorch)
def preprocess_image_for_gradcam(image_path):
    img = Image.open(image_path).convert('RGB')  # Ensure the image is in RGB mode
    transform = transforms.Compose([
        transforms.Resize((800, 600)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    img_tensor = transform(img).unsqueeze(0)  # Add batch dimension
    return img_tensor

# Function to get the model's last convolutional layer
def get_last_conv_layer(model):
    for layer in reversed(model.layers):
        if isinstance(layer, tf.keras.layers.Conv2D):
            return layer.name
    return None

# Function to generate Grad-CAM heatmap
def generate_gradcam_heatmap(model, image_tensor, target_layer_name):
    grad_model = tf.keras.models.Model(
        inputs=[model.inputs],
        outputs=[model.get_layer(target_layer_name).output, model.output]
    )

    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(image_tensor)
        loss = predictions[:, 0]

    grads = tape.gradient(loss, conv_outputs)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
    conv_outputs = conv_outputs.numpy()[0]
    pooled_grads = pooled_grads.numpy()

    for i in range(pooled_grads.shape[-1]):
        conv_outputs[:, :, i] *= pooled_grads[i]

    heatmap = np.mean(conv_outputs, axis=-1)
    heatmap = np.maximum(heatmap, 0)
    heatmap /= np.max(heatmap)
    return heatmap

# Function to apply Grad-CAM to the image and display the results
def apply_gradcam(image_path, model, target_layer_name):
    image_tensor = preprocess_image(image_path)  # Preprocess for TensorFlow model
    gradcam_image_tensor = preprocess_image_for_gradcam(image_path)  # Preprocess for Grad-CAM

    heatmap = generate_gradcam_heatmap(model, image_tensor, target_layer_name)

    img = Image.open(image_path)
    heatmap = np.uint8(255 * heatmap)
    heatmap = Image.fromarray(heatmap).resize((img.size[0], img.size[1]))
    heatmap = np.array(heatmap)

    plt.imshow(img)
    plt.imshow(heatmap, cmap='jet', alpha=0.5)  # Overlay the heatmap
    plt.axis('off')
    plt.show()

# Function to predict the class of an image
def predict(image_path):
    processed_img = preprocess_image(image_path)
    prediction = model.predict(processed_img)
    
    # Generate and display Grad-CAM heatmap
    target_layer = get_last_conv_layer(model)
    apply_gradcam(image_path, model, target_layer)
    
    return prediction



In [None]:
# Load CSV files containing image paths and labels
neg_path = 'NEGATIVE_GROUP_datathon.csv'
pos_path = 'POSITIVE_GROUP_datathon.csv'

negative_df = pd.read_csv(neg_path)
positive_df = pd.read_csv(pos_path)

# Add labels to the dataframes: 0 for negative, 1 for positive
negative_df['label'] = 0
positive_df['label'] = 1

# Combine the negative and positive dataframes
all_data = pd.concat([negative_df, positive_df], axis=0)

# Update image paths to match new directory structure
all_data['png_path'] = all_data['png_path'].str.replace('/data/mammo/', '/fsx1/emory-mammo/images/')

# Drop rows with missing diagnosis (asses_dx) values
all_data = all_data.dropna(subset=['asses_dx'])

# Filter rows with "A" in asses and either "B" or "S" in asses_dx
all_data = all_data[(all_data['asses'] == 'A') & (all_data['asses_dx'].isin(['B', 'S']))]

# Separate positive and negative labels
positive_data = all_data[all_data['label'] == 1]
negative_data = all_data[all_data['label'] == 0]

# Select 500 samples from each group
positive_sample = positive_data.sample(n=500, random_state=42)
negative_sample = negative_data.sample(n=500, random_state=42)

# Combine the samples to create the test set
test = pd.concat([positive_sample, negative_sample], axis=0)

In [None]:
# Perform inference on all images in the test set
results = []
for index, row in test.iterrows():
    image_path = row['png_path']  # Get the image path
    prediction = predict(image_path)  # Predict the class
    results.append({
        'png_ppath': image_path,  # Store the image path
        'true_label': row['label'],  # Store the true label
        'predicted_prob': prediction[0][0]  # Store the predicted probability
    })

# Convert results to a DataFrame
results_df = pd.DataFrame(results)
print(results_df.head())  # Display the first few rows of the results

# Import metrics functions for evaluation
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Convert predicted probabilities to binary predictions using a threshold of 0.5
results_df['predicted_label'] = results_df['predicted_prob'].apply(lambda x: 1 if x >= 0.5 else 0)

In [None]:
# Calculate evaluation metrics
accuracy = accuracy_score(results_df['true_label'], results_df['predicted_label'])
precision = precision_score(results_df['true_label'], results_df['predicted_label'])
recall = recall_score(results_df['true_label'], results_df['predicted_label'])
f1 = f1_score(results_df['true_label'], results_df['predicted_label'])
auc = roc_auc_score(results_df['true_label'], results_df['predicted_prob'])

# Display the evaluation metrics
metrics = {
    'Accuracy': accuracy,
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1,
    'AUC': auc
}

print(metrics)

# Print the total number of positive labels in the test set and the test set shape
print(f"Number of positive labels in test set: {test['label'].sum()}")
print(f"Test set shape: {test.shape}")

# Calculate and print the number of negative labels in the test set
num_negative_labels = test.shape[0] - test['label'].sum()
print(f"Number of negative labels in test set: {num_negative_labels}")
