In [None]:
%load_ext watermark
%watermark -v -p numpy,pandas,torch,torchvision,PIL,sklearn,matplotlib,wandb,captum --conda

In [None]:
# Setting up the environment

import time
import os
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt
import seaborn as sns
from utils.set_seed import set_seed
from sklearn.model_selection import train_test_split
from utils.data_utils import prepare_dataset
from utils.model_eval import predict, visualize_integrated_gradients_age
from warnings import filterwarnings

# Filter Warnings
filterwarnings("ignore")

# Set seed for reproducibility

SEED = 0
set_seed(SEED)

# Get start time of the current experiment
start_time = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime())

# Set the device to GPU if available
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Device: {torch.cuda.get_device_name(DEVICE)}")

In [None]:
# Loading data from csv file

df = pd.read_csv("Data\ccs_dataset.csv")
print(f"Dataset shape: {df.shape}")
df.head()

In [None]:
df.describe()

In [None]:
# Splitting the dataset into train, validation and test sets according to the model training split strategy

from sklearn.model_selection import train_test_split

train_df, temp_df = train_test_split(df, test_size=0.2, stratify=df.age_group)
test_df, val_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df.age_group)

# Deleting unecessary sets to free memory
del temp_df

In [None]:
# Verify if ids on train and test sets are disjoint

train_ids = set(train_df.id)
test_ids = set(test_df.id)
print(f"Train and test ids are disjoint: {len(train_ids.intersection(test_ids)) == 0}")

In [None]:
# Visualizing the test set

print(f"Holdout set shape: {test_df.shape}")
test_df.head().style

In [None]:
# Preparing the test set

test_dataset = prepare_dataset(
    test_df,
    batch_size=32,
    shuffle=False,
    augment=False,
    multitask=False,
    input_size=(299, 299),
)

In [None]:
# Preparing the validation set

val_dataset = prepare_dataset(
    val_df,
    batch_size=32,
    shuffle=False,
    augment=False,
    multitask=False,
    input_size=(299, 299),
)

In [None]:
# Building the model and loading the best weights

from torch import nn
from models.pytorch.architectures.InceptionV4 import (
    InceptionStem,
    InceptionA,
    InceptionB,
    InceptionC,
    ReductionA,
    ReductionB,
)


class InceptionV4(nn.Module):
    def __init__(self, num_classes, dropout_prob, dense_units):
        super(InceptionV4, self).__init__()

        self.stem = InceptionStem()

        self.inception_a_blocks = nn.Sequential(
            InceptionA(384),
            InceptionA(384),
            InceptionA(384),
            InceptionA(384),
        )

        self.reduction_a = ReductionA(384)

        self.inception_b_blocks = nn.Sequential(
            InceptionB(1024),
            InceptionB(1024),
            InceptionB(1024),
            InceptionB(1024),
            InceptionB(1024),
            InceptionB(1024),
            InceptionB(1024),
        )

        self.reduction_b = ReductionB(1024)

        self.inception_c_blocks = nn.Sequential(
            InceptionC(1536),
            InceptionC(1536),
            InceptionC(1536),
        )

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(dropout_prob)
        self.fc1 = nn.Linear(1536, dense_units)
        self.fc2 = nn.Linear(dense_units, num_classes)

    def forward(self, x):
        x = self.stem(x)
        x = self.inception_a_blocks(x)
        x = self.reduction_a(x)
        x = self.inception_b_blocks(x)
        x = self.reduction_b(x)
        x = self.inception_c_blocks(x)
        x = self.avgpool(x)

        x = torch.flatten(x, 1)
        x = self.dropout(x)

        x = self.fc1(x)
        x = self.fc2(x)

        return x


# Creating the model: InceptionV4
model = InceptionV4(
    num_classes=1, # 1 for age regression, 2 for sexual dimorfism classification
    dropout_prob=0.7,
    dense_units=1024,
).to(DEVICE)

# Loading the model best_weights
model.load_state_dict(
    torch.load(
        "models/pytorch/weights/your_model_best_weights.pt"))


In [None]:
# Predicting the test set

test_df["pred"] = predict(model, test_dataset, DEVICE)

In [None]:
# predicting the validation set

val_df["pred"] = predict(model, val_dataset, DEVICE)

In [None]:
# Test adversarial perturbations 

def fgsm_attack(image, epsilon, data_grad):
    sign_data_grad = data_grad.sign()
    perturbed_image = image + epsilon * sign_data_grad
    perturbed_image = torch.clamp(perturbed_image, 0, 1)
    return perturbed_image

def test(model, device, data_loader, epsilon):
    perturbed_total_loss = 0
    loss_fn = nn.MSELoss()

    for data, target in data_loader:
        data, target = data.to(device).float(), target.to(device).float()
        target = target.view(-1, 1)  # Transforming target to the same shape as the output
        data.requires_grad = True
        output = model(data)

        model.zero_grad()
        loss = loss_fn(output, target)
        loss.backward()
        data_grad = data.grad.data
        perturbed_data = fgsm_attack(data, epsilon, data_grad)

        perturbed_output = model(perturbed_data)
        perturbed_loss = loss_fn(perturbed_output, target)
        perturbed_total_loss += perturbed_loss.item() * len(target)

    avg_perturbed_loss = perturbed_total_loss / len(data_loader.dataset)
    print("Epsilon: {}\tPerturbed Average Loss = {}".format(epsilon, avg_perturbed_loss))

    return avg_perturbed_loss


perturbed_avg_losses = []
epsilons = [0, .05, .1, .15, .2, .25, .3]

# Run test for each epsilon
for eps in epsilons:
    perturbed_avg_loss = test(model, DEVICE, test_dataset, eps)
    perturbed_avg_losses.append(perturbed_avg_loss)

In [None]:
# Creating a DataFrame with the results
import pandas as pd
results_df = pd.DataFrame({
    'Epsilon': epsilons,
    'Perturbed Average Loss': perturbed_avg_losses
})

# Setting the style of the plots
sns.set_style("whitegrid")

# Creating the plot
plt.figure(figsize=(10, 6))
sns.lineplot(data=results_df, x='Epsilon', y='Perturbed Average Loss', marker='o', color='blue')

# Adding titles and labels
plt.title('Effect of FGSM Attack on the Model', fontsize=14)
plt.xlabel('Epsilon', fontsize=12)
plt.ylabel('Perturbed MSE Loss', fontsize=12)

# Showing the plot
plt.show()



In [None]:
# Create a new column with the absolute error between the real age and the predicted age

test_df["error"] = test_df["age_in_years"] - test_df["pred"]
test_df["abs_error"] = abs(test_df["age_in_years"] - test_df["pred"])

# Visualizing the test set with the predictions
test_df.head().style


In [None]:
# Create a new column with the absolute error between the real age and the predicted age

val_df["error"] = val_df["age_in_years"] - val_df["pred"]
val_df["abs_error"] = abs(val_df["age_in_years"] - val_df["pred"])

In [None]:
# Plotting the absolute error per age group

plt.figure(figsize=(8, 5))

age_groups = test_df["age_group"].unique()
min_age_per_group = {group: test_df[test_df["age_group"] == group]["age_in_years"].min() for group in age_groups}
age_groups_sorted = sorted(age_groups, key=lambda group: min_age_per_group[group])

ax = sns.barplot(
    x="age_group",
    y="abs_error",
    data=test_df,
    order=age_groups_sorted,
    color="seagreen",
)

plt.xticks(rotation=45)
ax.set(xlabel="Age Group")
ax.set(ylabel="Holdout MAE (years)")
plt.title("Mean Absolute Error per Age Group in Age Prediction with InceptionV4")
plt.show()


In [None]:
# Calculation MAE and IC for the Validation Set

mae_val = val_df["abs_error"].mean()
icse_val = 1.96 * val_df["abs_error"].std() / np.sqrt(val_df.shape[0])

print(f"Validation MAE: {mae_val:.3f} ± {icse_val:.3f} years")

# Calculation MSE and IC for the Validation Set

mse_val = (val_df["error"] ** 2).mean()
icse_val = 1.96 * val_df["error"].std() / np.sqrt(val_df.shape[0])

print(f"Validation MSE: {mse_val:.3f} ± {icse_val:.3f} years")

In [None]:
# Calculating MAE and IC for the test set
mae = test_df["abs_error"].mean()
ic = 1.96 * test_df["abs_error"].std() / np.sqrt(len(test_df)) # 95% confidence interval

print(f"MAE: {mae:.3f} ± {ic:.3f} years")


In [None]:
# Calculating MSE and IC for the test set

mse = (test_df["error"] ** 2).mean()
ic = 1.96 * test_df["error"].std() / np.sqrt(len(test_df)) # 95% confidence interval

print(f"MSE: {mse:.3f} ± {ic:.3f} years²")

In [None]:
# Calculating MAE and robust measures of spread for the test set
mae = test_df["abs_error"].mean()
mse = test_df["abs_error"].pow(2).mean()
median_abs_error = test_df["abs_error"].median()
q1, q3 = test_df["abs_error"].quantile([0.25, 0.75])
iqr = q3 - q1

print(f"MAE: {mae:.3f} years")
print(f"Mean Squared Error: {mse:.3f} years")
print(f"Median absolute error: {median_abs_error:.3f} years")
print(f"25th percentile of absolute error: {q1:.3f} years")
print(f"75th percentile of absolute error: {q3:.3f} years")
print(f"IQR of absolute error: {iqr:.3f} years")

# Calculating R² and Explained Variance for the test set

from sklearn.metrics import r2_score, explained_variance_score

r2 = r2_score(test_df["age_in_years"], test_df["pred"])
ev = explained_variance_score(test_df["age_in_years"], test_df["pred"])

print(f"R²: {r2:.3f}")
print(f"Explained Variance: {ev:.3f}")


In [None]:
import seaborn as sns
from sklearn.utils import resample

# Round ages to nearest integer
test_df['rounded_age'] = test_df['age_in_years'].round()

# Define a function to calculate the bootstrap confidence interval
def bootstrap_ci(data, n_iterations=1000, ci=95):
    stats = list()
    for _ in range(n_iterations):
        sample = resample(data)
        stats.append(sample.mean())
    return (
        np.percentile(stats, (100 - ci) / 2),
        np.percentile(stats, ci + (100 - ci) / 2),
    )

# Group by rounded_age and calculate mean error and bootstrap confidence interval for each age
grouped = test_df.groupby("rounded_age").agg(
    {"abs_error": ["mean", lambda x: bootstrap_ci(x)]}
)
grouped.columns = ["MeanError", "ConfInterval"]

# Separate the lower and upper bounds of the confidence interval into separate columns
grouped[["LowerCI", "UpperCI"]] = pd.DataFrame(
    grouped["ConfInterval"].tolist(), index=grouped.index
)

# Set Seaborn style
sns.set(style="whitegrid")

# Set Seaborn color palette
sns.set_palette("crest")

# Plot the mean error as a line
plt.figure(figsize=(10, 6))
plt.plot(
    grouped.index,
    grouped["MeanError"],
    label="Mean Error",
)

# Plot the confidence interval as a shaded area
plt.fill_between(
    grouped.index,
    grouped["LowerCI"],
    grouped["UpperCI"],
    color=sns.color_palette("crest")[2],
    alpha=0.2,
    label="95% Confidence Interval",
)

# Add labels and legend
plt.xlabel("Age in Years")
plt.ylabel("Absolute Error")
plt.title("Mean Absolute Error and Confidence Interval by Age")
plt.legend(loc="upper left")

# Display the figure
plt.show()


In [None]:
# Bin ages into 5-year intervals
bins = np.arange(0, test_df["age_in_years"].max() + 5, 5)
test_df["binned_age"] = pd.cut(test_df["age_in_years"], bins, right=False)

# Group by binned_age and calculate mean and confidence interval for each age group
grouped = test_df.groupby("binned_age").agg({"abs_error": ["mean", "std", "count"]})
grouped.columns = ["MeanError", "StdError", "Count"]
grouped["ConfInterval"] = (
    1.96 * grouped["StdError"] / np.sqrt(grouped["Count"])
)  # 95% confidence interval

# Compute mid-points of intervals for plotting
grouped["MidPoint"] = grouped.index.map(lambda x: x.mid)

import matplotlib.pyplot as plt
import seaborn as sns

# Create a new figure
plt.figure(figsize=(10, 6))

# Plot the mean error as a line using Seaborn
sns.lineplot(x=grouped["MidPoint"], y=grouped["MeanError"], label="Mean Error")

# Plot the confidence interval as a shaded area using Matplotlib
plt.fill_between(
    grouped["MidPoint"],
    (grouped["MeanError"] - grouped["ConfInterval"]),
    (grouped["MeanError"] + grouped["ConfInterval"]),
    color=sns.color_palette("crest")[2],
    alpha=0.1,
    label="95% Confidence Interval",
)

# Add labels and legend
plt.xlabel("Age in Years")
plt.ylabel("Absolute Error")
plt.title("Mean Absolute Error and Confidence Interval by Age")
plt.legend(loc="upper left")

# Remove the top and right spines from plot
sns.despine()

# Display the figure
plt.show()

In [None]:
# Calculating the average and difference between the real age and the predicted age

BA_df = test_df.copy()

BA_df['average'] = (BA_df['age_in_years'] + BA_df['pred']) / 2
BA_df['difference'] = BA_df['age_in_years'] - BA_df['pred']

# Uncoment condition to filter patients with age between 0 and 23 years

filtered_BA_df = BA_df#[BA_df['age_in_years'] <= 23]


# Calculating the mean and standard deviation of the difference
mean_difference = filtered_BA_df['difference'].mean()
std_difference = filtered_BA_df['difference'].std()

# Creating the Bland-Altman plot
plt.figure(figsize=(10, 8))
plt.scatter(filtered_BA_df['average'], filtered_BA_df['difference'], alpha=0.5)
plt.axhline(mean_difference, color='red', linestyle='--')
plt.text(filtered_BA_df['average'].max(), mean_difference, 'Mean: {:.2f}'.format(mean_difference), va='center', ha='right', backgroundcolor='w', fontsize=16)
plt.axhline(mean_difference + 1.96*std_difference, color='blue', linestyle='--')
plt.text(filtered_BA_df['average'].max(), mean_difference + 1.96*std_difference, '+1.96 SD: {:.2f}'.format(mean_difference + 1.96*std_difference), va='center', ha='right', backgroundcolor='w', fontsize=16)
plt.axhline(mean_difference - 1.96*std_difference, color='blue', linestyle='--')
plt.text(filtered_BA_df['average'].max(), mean_difference - 1.96*std_difference, '-1.96 SD: {:.2f}'.format(mean_difference - 1.96*std_difference), va='center', ha='right', backgroundcolor='w', fontsize=16)

plt.xlabel('Average between real age and predicted age', fontsize=16)
plt.ylabel('Diference between real age and predicted age', fontsize=16)
plt.show()

In [None]:
from scipy import stats

# Calculate the paired t-test for the real ages and the predicted ages
t_stat, p_value = stats.ttest_rel(BA_df['age_in_years'], BA_df['pred'])

print(f'T-statistic: {t_stat}\nP-value: {p_value}')
## P-Value > 0.05: Fail to reject the null hypothesis (H0) -> The predicted ages are not significantly different from the real ages


In [None]:
# Creating a DataFrame with the frequency of each age group in the train set and the MAE and IC of each age group in the test set

age_groups = train_df["age_group"].unique()

min_age_per_group = {
    group: test_df[test_df["age_group"] == group]["age_in_years"].min()
    for group in age_groups
}
age_groups_sorted = sorted(age_groups, key=lambda group: min_age_per_group[group])
train_df["age_group"].value_counts().sort_index()
test_df["age_group"].value_counts().sort_index()
mae_per_age_group = test_df.groupby("age_group")["abs_error"].mean().sort_index()
std_per_age_group = test_df.groupby("age_group")["abs_error"].std().sort_index()

combined_df = pd.DataFrame(
    {
        "train_freq": train_df["age_group"].value_counts().sort_index(),
        "holdout_freq": test_df["age_group"].value_counts().sort_index(),
        "holdout_mae": mae_per_age_group,
        "holdout_mae_std": std_per_age_group,
    }
)
# Drop non-existent age groups in the test set
combined_df = combined_df.dropna()
combined_df["holdout_freq"] = combined_df["holdout_freq"].astype(int)

# Transform age_group into a categorical variable with the correct order
combined_df.reset_index(inplace=True)
combined_df["age_group"] = pd.Categorical(
    combined_df["age_group"], categories=age_groups_sorted, ordered=True
)

# Order dataframe by age_group
combined_df = combined_df.sort_values("age_group")

combined_df.style

In [None]:
# Plotting full dataset Error distribution to verify if they are simetrical or if there is a bias

plt.figure(figsize=(8, 5))

ax = sns.distplot(
    test_df["error"],
    color="seagreen",
)

plt.xlabel('Error (years)', fontsize=14)
plt.ylabel('Density', fontsize=14)
plt.title("Error Distribution in Age Prediction", fontsize=16)
plt.show()


In [None]:
# Plotting distributions of the actual ages, predicted ages, prediction errors and absolute prediction errors

# Set the style of the plots
sns.set_style("whitegrid")
fig, ax = plt.subplots(2, 2, figsize=(16, 12))

# Plot the distribution of the actual ages
sns.histplot(data=test_df, x='age_in_years', kde=True, color='blue', ax=ax[0, 0])
ax[0, 0].set_title('Distribution of Actual Ages', fontsize=14)

# Plot the distribution of the predicted ages
sns.histplot(data=test_df, x='pred', kde=True, color='orange', ax=ax[0, 1])
ax[0, 1].set_title('Distribution of Predicted Ages', fontsize=14)

# Plot the distribution of the prediction errors
sns.histplot(data=test_df, x='error', kde=True, color='green', ax=ax[1, 0])
ax[1, 0].set_title('Distribution of Prediction Errors', fontsize=14)

# Plot the distribution of the absolute prediction errors
sns.histplot(data=test_df, x='abs_error', kde=True, color='red', ax=ax[1, 1])
ax[1, 1].set_title('Distribution of Absolute Prediction Errors', fontsize=14)

# Show the plots
plt.tight_layout()
plt.show()

In [None]:
# Verify clusterization of the test set predictions using KMeans with 3 clusters to test if the Human Expert feedback is similar.

from sklearn.cluster import KMeans

# Fit a KMeans model with 3 clusters
kmeans = KMeans(n_clusters=3, random_state=0)
test_df['cluster'] = kmeans.fit_predict(test_df[['age_in_years', 'pred']])

# Create a scatter plot of actual age vs. predicted age, colored by cluster
plt.figure(figsize=(10, 6))
sns.scatterplot(data=test_df, x='age_in_years', y='pred', hue='cluster', palette='Set1', alpha=0.6)
plt.title('Actual Age vs. Predicted Age, Colored by Cluster', fontsize=14)
plt.xlabel('Actual Age', fontsize=12)
plt.ylabel('Predicted Age', fontsize=12)
plt.show()


In [None]:
# Test if the clusterization is similar using log transformation

# Apply log transformation
test_df['log_age_in_years'] = np.log1p(test_df['age_in_years'])
test_df['log_pred_age'] = np.log1p(test_df['pred'])

# Perform K-means clustering
kmeans = KMeans(n_clusters=3, random_state=0)
test_df['cluster'] = kmeans.fit_predict(test_df[['log_age_in_years', 'log_pred_age']])

# Plot clusters
plt.figure(figsize=(10, 6))
sns.scatterplot(data=test_df, x='log_age_in_years', y='log_pred_age', hue='cluster', palette='Set1', alpha=0.6)
plt.title('Log Actual Age vs. Log Predicted Age, Colored by Cluster', fontsize=14)
plt.xlabel('Log Actual Age', fontsize=12)
plt.ylabel('Log Predicted Age', fontsize=12)
plt.show()

In [None]:
# Verify if we can find a pattern in the clusterization using a more complex approach (UMAP)

from torchvision import models, transforms
from PIL import Image
import umap.umap_ as umap

# Load the ResNet50 model with pre-trained weights
resnet = models.resnet50(pretrained=True)
resnet = torch.nn.Sequential(*(list(resnet.children())[:-1]))  # Remove the last layer to get features
resnet.eval()

# Directory where your images are saved
image_dir = 'data/test_set'

# Define the image transformations - resize to 224x224 (expected by ResNet), convert to tensor, and normalize
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load and preprocess each image, then use ResNet50 to extract features
features = []
for img_path in os.listdir(image_dir):
    img = Image.open(os.path.join(image_dir, img_path)).convert('RGB')
    input_tensor = preprocess(img)
    input_batch = input_tensor.unsqueeze(0) 

    # If you have a GPU, put everything on cuda
    input_batch = input_batch.to(DEVICE)
    resnet.to(DEVICE)

    with torch.no_grad():
        output = resnet(input_batch)

    # Tensor of shape 1000, with confidence scores over Imagenet's 1000 classes
    features.append(output.cpu().numpy().flatten())

# Perform UMAP dimensionality reduction
reducer = umap.UMAP(n_epochs=1000, random_state=SEED)
embedding = reducer.fit_transform(features)

# Append our 2D UMAP components to this DataFrame
test_df['UMAP1'] = embedding[:, 0]
test_df['UMAP2'] = embedding[:, 1]

# Calculate correlation between UMAP components and actual/predicted age
correlation_with_umap1 = test_df[['UMAP1', 'age_in_years', 'pred']].corr()
correlation_with_umap2 = test_df[['UMAP2', 'age_in_years', 'pred']].corr()

print(correlation_with_umap1)
print(correlation_with_umap2)

# Plot UMAP components with actual age and predicted age
fig, ax = plt.subplots(1, 2, figsize=(20, 10))

sns.scatterplot(data=test_df, x='UMAP1', y='UMAP2', hue='age_in_years', ax=ax[0])
ax[0].set_title('UMAP colored by actual age')

sns.scatterplot(data=test_df, x='UMAP1', y='UMAP2', hue='pred', ax=ax[1])
ax[1].set_title('UMAP colored by predicted age')

plt.show()

In [None]:
# Testing the hdbscan clusterization algorithm based on UMAP components

import hdbscan

# Create an HDBSCAN object
clusterer = hdbscan.HDBSCAN(min_samples=10, gen_min_span_tree=True)

# Fit the HDBSCAN model
clusterer.fit(test_df[['UMAP1', 'UMAP2']])

# Add the cluster labels to the DataFrame
test_df['cluster'] = clusterer.labels_

# Plot UMAP components colored by cluster with legend off the grid
plt.figure(figsize=(10, 8))
sns.scatterplot(data=test_df, x='UMAP1', y='UMAP2', hue='cluster', palette='Spectral', legend="full")
plt.title('UMAP colored by HDBSCAN cluster')
plt.show()

In [None]:
# Plot actual age vs predicted age highlinting the clusters from HDBSCAN

plt.figure(figsize=(10, 8))
sns.scatterplot(data=test_df, x='age_in_years', y='pred', hue='cluster', palette='Set1', alpha=0.6)
plt.title('Actual Age vs. Predicted Age, Colored by Anomaly', fontsize=14)
plt.xlabel('Actual Age', fontsize=12)
plt.ylabel('Predicted Age', fontsize=12)
plt.show()

In [None]:
# Perform anomaly detection using the UMAP components

# Anomaly detection: points that are far from the others could be considered anomalies
distances = np.sum((embedding - np.mean(embedding, axis=0))**2, axis=1)
anomalies = distances > np.percentile(distances, 95)  # Considering the 5% furthest points as anomalies
test_df['anomaly'] = anomalies

# Plot actual age vs predicted age highlinting the anomalies

plt.figure(figsize=(10, 8))
sns.scatterplot(data=test_df, x='age_in_years', y='pred', hue='anomaly', palette='Set1', alpha=0.6)
plt.title('Actual Age vs. Predicted Age, Colored by Anomaly', fontsize=14)
plt.xlabel('Actual Age', fontsize=12)
plt.ylabel('Predicted Age', fontsize=12)
plt.show()


In [None]:
# Verify the sex-specific performance of the model

plt.figure(figsize=(14, 6))

# Plot for male
plt.subplot(1, 2, 1)
sns.histplot(test_df[test_df['sex'] == 0]['age_in_years'], bins=20, color='blue', label='Actual Age')
sns.histplot(test_df[test_df['sex'] == 0]['pred'], bins=20, color='red', label='Predicted Age')
plt.xlabel('Age')
plt.title('Distribution of Actual and Predicted Age for Male')
plt.legend()

# Plot for female
plt.subplot(1, 2, 2)
sns.histplot(test_df[test_df['sex'] == 1]['age_in_years'], bins=20, color='blue', label='Actual Age')
sns.histplot(test_df[test_df['sex'] == 1]['pred'], bins=20, color='red', label='Predicted Age')
plt.xlabel('Age')
plt.title('Distribution of Actual and Predicted Age for Female')
plt.legend()

plt.show()

In [None]:
# Count of examples by gender
gender_counts = test_df['sex'].value_counts()

# Mean predicted age by gender
mean_pred_age_by_gender = test_df.groupby('sex')['pred'].mean()

# Mean abs_error by gender
mean_abs_error_by_gender = test_df.groupby('sex')['abs_error'].mean()

# Create figure with two subplots: one for counts and one for mean predicted age
fig, ax = plt.subplots(1, 3, figsize=(15, 5))

# Bar plot for counts
sns.barplot(x=gender_counts.index, y=gender_counts.values, ax=ax[0])
ax[0].set_title('Count of Examples by Gender')
ax[0].set_xlabel('Gender')
ax[0].set_ylabel('Count')
ax[0].set_xticklabels(['Male', 'Female'])

# Bar plot for mean predicted age
sns.barplot(x=mean_pred_age_by_gender.index, y=mean_pred_age_by_gender.values, ax=ax[1])
ax[1].set_title('Mean Predicted Age by Gender')
ax[1].set_xlabel('Gender')
ax[1].set_ylabel('Mean Predicted Age')
ax[1].set_xticklabels(['Male', 'Female'])

# Bar plot for mean absolute error
sns.barplot(x=mean_abs_error_by_gender.index, y=mean_abs_error_by_gender.values, ax=ax[2])
ax[2].set_title('Mean Abs Error by Gender')
ax[2].set_xlabel('Gender')
ax[2].set_ylabel('Mean Abs Error Age')
ax[2].set_xticklabels(['Male', 'Female'])

plt.tight_layout()
plt.show()

In [None]:
# Creating a scatter plot with the training frequency and the MAE for each age group
plt.figure(figsize=(12, 6))
scatter_plot = sns.scatterplot(
    x="train_freq",
    y="holdout_mae",
    hue="age_group",
    data=combined_df,
    s=100,
    palette='crest',
    legend=None,
)

# Adding the age group as a label for each point
for idx, row in combined_df.iterrows():
    scatter_plot.text(row["train_freq"], row["holdout_mae"] + 0.3, row["age_group"], horizontalalignment="center", size="medium", color="black")

# Calculating the Pearson correlation coefficient
pearson_corr = np.corrcoef(combined_df["train_freq"], combined_df["holdout_mae"])[0, 1]

# Adding the Pearson correlation coefficient to the plot
plt.text(
    0.95,
    0.95,
    f"Pearson Correlation: {pearson_corr:.2f}",
    ha="right",
    va="top",
    transform=plt.gca().transAxes,
    bbox=dict(facecolor="white", alpha=0.8, edgecolor="black"),
    fontsize=14,
)

# Adding the title and labels
plt.xlabel("Training Frequency", fontsize=16)
plt.ylabel("Mean Absolute Error (MAE)", fontsize=16)

plt.show()


In [None]:
# Calculate pearsons correlation coefficient between training frequency and MAE for each age group

# Define the age groups to calculate the correlation coefficient
age_group_0_19 = ["[0, 5)", "[5, 10)", "[10, 15)", "[15, 20)"]
age_group_20_39 = ["[20, 25)", "[25, 30)", "[30, 35)", "[35, 40)"]
age_group_40_69 = ["[40, 45)", "[45, 50)", "[50, 55)", "[55, 60)", "[60, 65)", "[65, 70)"]
age_groups_70_plus = ["[70, 75)","[75, 80)", "[80, 85)", "[85, 90)", "[90, 100)"]

# Calculate the correlation coefficient for each age group

corr_0_19 = np.corrcoef(combined_df[combined_df["age_group"].isin(age_group_0_19)]["train_freq"], combined_df[combined_df["age_group"].isin(age_group_0_19)]["holdout_mae"])[0, 1]
corr_20_39 = np.corrcoef(combined_df[combined_df["age_group"].isin(age_group_20_39)]["train_freq"], combined_df[combined_df["age_group"].isin(age_group_20_39)]["holdout_mae"])[0, 1]
corr_40_69 = np.corrcoef(combined_df[combined_df["age_group"].isin(age_group_40_69)]["train_freq"], combined_df[combined_df["age_group"].isin(age_group_40_69)]["holdout_mae"])[0, 1]
corr_70_plus = np.corrcoef(combined_df[combined_df["age_group"].isin(age_groups_70_plus)]["train_freq"], combined_df[combined_df["age_group"].isin(age_groups_70_plus)]["holdout_mae"])[0, 1]

# Print the correlation coefficient for each age group

print(f"Correlation coefficient for age group 0-19: {corr_0_19:.2f}")
print(f"Correlation coefficient for age group 20-39: {corr_20_39:.2f}")
print(f"Correlation coefficient for age group 40-69: {corr_40_69:.2f}")
print(f"Correlation coefficient for age group 70+: {corr_70_plus:.2f}")



In [None]:
# Sorting the test set by the absolute error and filtering age groups to perform better visualization

test_df = test_df.sort_values(by="abs_error", ascending=True)
test_df_0_5 = test_df.query("age_group == '[0, 5)'")
test_df_5_10 = test_df.query("age_group == '[5, 10)'")
test_df_10_15 = test_df.query("age_group == '[10, 15)'")
test_df_15_20 = test_df.query("age_group == '[15, 20)'")
test_df_20_25 = test_df.query("age_group == '[20, 25)'")
test_df_25_30 = test_df.query("age_group == '[25, 30)'")
test_df_30_35 = test_df.query("age_group == '[30, 35)'")
test_df_35_40 = test_df.query("age_group == '[35, 40)'")
test_df_40_45 = test_df.query("age_group == '[40, 45)'")
test_df_45_50 = test_df.query("age_group == '[45, 50)'")
test_df_50_55 = test_df.query("age_group == '[50, 55)'")
test_df_55_60 = test_df.query("age_group == '[55, 60)'")
test_df_60_65 = test_df.query("age_group == '[60, 65)'")
test_df_65_70 = test_df.query("age_group == '[65, 70)'")
test_df_70_75 = test_df.query("age_group == '[70, 75)'")
test_df_75_80 = test_df.query("age_group == '[75, 80)'")
test_df_80_85 = test_df.query("age_group == '[80, 85)'")
test_df_85_90 = test_df.query("age_group == '[85, 90)'")
test_df_90_100 = test_df.query("age_group == '[90, 100)'")

test_df.head(10).style

In [None]:
# Visualizing important regions of the images for predictions

img_iloc = 3 # Select image from the sorted test set (-1 = worst prediction, 0 = best prediction)

ig_df = test_df
image_path = ig_df.iloc[img_iloc]["path"]
image_id = ig_df.iloc[img_iloc]["id"]
label = ig_df.iloc[img_iloc]["age_in_years"]

input_size = (299, 299) # Input size of the model, for InceptionV4 is (299, 299)
output_size = (720, 1475) # If none, the output size will be the same as the input size

visualize_integrated_gradients_age(image_path, image_id, label, model, input_size, device=DEVICE, output_size=None)
# visualize_gradient_shap(image_path, label, model, input_size, device=DEVICE, output_size=None)
# visualize_saliency(image_path, label, model, input_size, device=DEVICE, output_size=None)
# visualize_captum_methods(image_path, label, model, input_size, device=DEVICE, output_size=None)

In [None]:
# Save all image results in a folder

input_size = (299, 299)  # Input size of the model, for InceptionV4 is (299, 299)
output_size = None  # If none, the output size will be the same as the input size

for index, row in test_df.iterrows():
    image_path = row["path"]
    image_id = row["id"]
    label = row["age_in_years"]

    visualize_integrated_gradients_age(
        image_path,
        image_id,
        label,
        model,
        input_size,
        device=DEVICE,
        output_size=output_size,
        save=True,
        save_path="data/age_results/",
    )


In [None]:
# Save test_df with predictions and absolute error on csv file

test_df.to_csv("data/age_results/test_df.csv", index=False)