# Import

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from ast import literal_eval
import spacy
import re


# Setup

In [None]:
# Set seeds
np.random.seed(42)

# Read data

In [None]:
df_train = pd.read_csv('metastatic_local_train.csv', converters={'accept': literal_eval})
df_test = pd.read_csv("metastatic_local_test.csv", converters={'accept': literal_eval})

In [None]:
# Create dummy variables from accept column
df_train = df_train.join(pd.get_dummies(df_train["accept"].explode()).groupby(level=0).sum())
df_test = df_test.join(pd.get_dummies(df_test["accept"].explode()).groupby(level=0).sum())


# Dataset characteristics

In [None]:
# Visualize the distribution of LOCAL and METASTATIC in the training set
fig = plt.figure(figsize=(5, 5))
train_cross_tab = pd.crosstab(df_train["LOCAL"], df_train["METASTATIC"])

sns.heatmap(train_cross_tab, annot=True, cmap="Blues", fmt="d")
plt.xlabel("Metastatic disease allowed")
plt.ylabel("Localized disease allowed")
plt.xticks([0.5, 1.5], ["No", "Yes"])
plt.yticks([0.5, 1.5], ["No", "Yes"])
# Remove the colorbar but keep the width of the heatmap
plt.gca().collections[0].colorbar.remove()
plt.show()
fig.savefig("plots/training_characteristics.png", dpi=300, bbox_inches="tight")

In [None]:
# Calculate percentages of respective fields in the training set

metastatic_and_local_train = 100 * train_cross_tab.loc[1, 1] / df_train.shape[0]
metastatic_not_local_train = 100 * train_cross_tab.loc[0, 1] / df_train.shape[0]
local_not_metastatic_train = 100 * train_cross_tab.loc[1, 0] / df_train.shape[0]
neither_train = 100 * train_cross_tab.loc[0, 0] / df_train.shape[0]
local_train = 100 * (metastatic_and_local_train + local_not_metastatic_train) / 100
metastatic_train = 100 * (metastatic_and_local_train + metastatic_not_local_train) / 100

print(f"Percentage of trials that allow both metastatic and localized disease: {metastatic_and_local_train:.2f}%")
print(f"Percentage of trials that allow metastatic disease but not localized disease: {metastatic_not_local_train:.2f}%")
print(f"Percentage of trials that allow localized disease but not metastatic disease: {local_not_metastatic_train:.2f}%")
print(f"Percentage of trials that allow neither metastatic nor localized disease: {neither_train:.2f}%")
print(f"Percentage of trials that allow localized disease: {local_train:.2f}%")
print(f"Percentage of trials that allow metastatic disease: {metastatic_train:.2f}%")

In [None]:
df_train["METASTATIC"].value_counts(normalize=True)

In [None]:
df_train["LOCAL"].value_counts(normalize=True)

In [None]:
# Visualize the distribution of LOCAL and METASTATIC in the test set
fig = plt.figure(figsize=(5, 5))
test_cross_tab = pd.crosstab(df_test["LOCAL"], df_test["METASTATIC"])
sns.heatmap(test_cross_tab, annot=True, cmap="Blues", fmt="d")
plt.xlabel("Metastatic disease allowed")
plt.ylabel("Localized disease allowed")
plt.xticks([0.5, 1.5], ["No", "Yes"])
plt.yticks([0.5, 1.5], ["No", "Yes"])
# Remove the colorbar but keep the width of the heatmap
plt.gca().collections[0].colorbar.remove()
plt.show()
fig.savefig("plots/test_characteristics.png", dpi=300, bbox_inches="tight")

In [None]:
# Calculate percentages of respective fields in the test set

metastatic_and_local_test = 100 * test_cross_tab.loc[1, 1] / df_test.shape[0]
metastatic_not_local_test = 100 * test_cross_tab.loc[0, 1] / df_test.shape[0]
local_not_metastatic_test = 100 * test_cross_tab.loc[1, 0] / df_test.shape[0]
neither_test = 100 * test_cross_tab.loc[0, 0] / df_test.shape[0]
local_test = 100 * (metastatic_and_local_test + local_not_metastatic_test) / 100
metastatic_test = 100 * (metastatic_and_local_test + metastatic_not_local_test) / 100


print(
    f"Percentage of trials that allow both metastatic and localized disease: {metastatic_and_local_test:.2f}%"
)
print(
    f"Percentage of trials that allow metastatic disease but not localized disease: {metastatic_not_local_test:.2f}%"
)
print(
    f"Percentage of trials that allow localized disease but not metastatic disease: {local_not_metastatic_test:.2f}%"
)
print(
    f"Percentage of trials that allow neither metastatic nor localized disease: {neither_test:.2f}%"
)
print(
    f"Percentage of trials that allow localized disease: {local_test:.2f}%"
)
print(
    f"Percentage of trials that allow metastatic disease: {metastatic_test:.2f}%"
)


In [None]:
df_test["METASTATIC"].value_counts(normalize=True)

In [None]:
df_test["LOCAL"].value_counts(normalize=True)

In [None]:
# Create a pie chart of the distribution of family in the training set using seaborn
fig = plt.figure(figsize=(5, 5))
sns.set_palette("Blues")
df_train["family"].value_counts().plot.pie(autopct="%1.1f%%")
plt.ylabel("")
plt.show()

fig.savefig("plots/family_distribution_train.png", dpi=300, bbox_inches="tight")

In [None]:
# Create a pie chart of the distribution of family in the test set using seaborn
fig = plt.figure(figsize=(5, 5))
sns.set_palette("Blues")
df_test["family"].value_counts().plot.pie(autopct="%1.1f%%")
plt.ylabel("")
plt.show()

fig.savefig("plots/family_distribution_test.png", dpi=300, bbox_inches="tight")


# Testing

## Make ML predictions

In [None]:
# Load model
metastatic_local_model = spacy.load("./metastatic_local_publication_v1/model-best")

In [None]:
# Predict on the test set
def predict(text):
    prediction = metastatic_local_model(text)
    return prediction.cats

df_test["ML_prediction"] = df_test["text"].apply(predict)

# Create columns from the prediction dictionary
df_test["LOCAL_PREDICTED_ML"] = df_test["ML_prediction"].apply(
    lambda x: 1 if x["LOCAL"] > 0.5 else 0
)
df_test["METASTATIC_PREDICTED_ML"] = df_test["ML_prediction"].apply(
    lambda x: 1 if x["METASTATIC"] > 0.5 else 0
)

## Make regex predictions

In [None]:
def predict_metastatic_with_regex(title):
    title = title.lower()

    # Check for non-metastatic in the title
    if re.search(r"non[ -]?metast", title):
        return 0
    # Check for metastatic in the title
    if (
        re.search(r"metasta", title)
        and not re.search(r"non[- ]?metasta", title)
        and not re.search(r"metastas[ie]s[- ]free", title)
    ):
        return 1
    # Check for stage IV in the title
    if re.search(r"\biv\b", title):
        return 1
    # Check for stages other than IV in the title
    if (
        re.search(r"\bstage\b", title)
        and not (
            re.search(r"\biv", title)
            or re.search(r"extensive[- ]stage", title)
            or re.search(r"advanced[- ]stage", title)
            or re.search(r"\b4\b", title)
        )
    ):
        return 0
    # Check for advanced in the title
    if re.search(r"advanced", title) and not re.search(r"locally[- ]advanced", title):
        return 1
    # Check for extensive stage in the title
    if re.search(r"extensive[- ]stage", title):
        return 1
    # Make a random guess if none of the above conditions are met
    return np.random.choice([0, 1])


def predict_local_with_regex(title):
    title = title.lower()

    # Check for early breast and "or metastatic"
    if re.search(r"\bearly breast", title) or re.search(r"\bor metast", title):
        return 1

    # Check for stages other than IV
    if re.search(r"stage i[^v]", title):
        return 1

    # Check for non-metastatic
    if re.search(r"non[ -]?metast", title):
        return 1

    # Check for locally advanced
    if re.search(r"locally[- ]advanced", title) or re.search(r"locoregional", title):
        return 1
    # Make a random guess if none of the above conditions are met
    return np.random.choice([0, 1])


df_test["METASTATIC_PREDICTED_REGEX"] = df_test["title"].apply(predict_metastatic_with_regex)
df_test["LOCAL_PREDICTED_REGEX"] = df_test["title"].apply(predict_local_with_regex)

## Refine ML predictions with regex

In [None]:
def refine_metastatic_with_regex(row):
    title = row["title"].lower()
    # Check for non-metastatic in the title
    if row["METASTATIC_PREDICTED_ML"] == 1 and re.search(r"non[ -]?metast", title):
        return 0

    # Check for metastatic in the title
    if (
        row["METASTATIC_PREDICTED_ML"] == 0
        and re.search(r"metasta", title)
        and not re.search(r"non[- ]?metasta", title)
        and not re.search(r"metastas[ie]s[- ]free", title)
    ):
        return 1

    # Check for stage IV in the title
    if row["METASTATIC_PREDICTED_ML"] == 0 and re.search(r"\biv\b", title):
        return 1

    # Check for stages other than IV in the title
    if (
        row["METASTATIC_PREDICTED_ML"] == 1
        and re.search(r"\bstage\b", title)
        and not (
            re.search(r"\biv", title)
            or re.search(r"extensive[- ]stage", title)
            or re.search(r"advanced[- ]stage", title)
            or re.search(r"\b4\b", title)
        )
    ):
        return 0

    # Check for advanced in the title
    if row["METASTATIC_PREDICTED_ML"] == 0 and re.search(
        r"advanced", title
    ) and not re.search(r"locally[- ]advanced", title):
        return 1
    
    # Check for extensive stage in the title
    if row["METASTATIC_PREDICTED_ML"] == 0 and re.search(r"extensive[- ]stage", title):
        return 1
    
    # Return the ML prediction if no conditions are met
    return row["METASTATIC_PREDICTED_ML"]


def refine_local_with_regex(row):
    title = row["title"].lower()

    # Check for early breast and "or metastatic"
    if row["LOCAL_PREDICTED_ML"] == 0 and (re.search(r"\bearly breast", title) or re.search(r"\bor metast", title)):
        return 1

    # Check for stages other than IV
    if row["LOCAL_PREDICTED_ML"] == 0 and re.search(r"stage i[^v]", title):
        return 1

    # Check for non-metastatic
    if row["LOCAL_PREDICTED_ML"] == 0 and re.search(r"non[ -]?metast", title):
        return 1

    # Check for locally advanced
    if row["LOCAL_PREDICTED_ML"] == 0 and (
        re.search(r"locally[- ]advanced", title) or re.search(r"locoregional", title)
    ):
        return 1
    # Return the ML prediction if no conditions are met
    return row["LOCAL_PREDICTED_ML"]

df_test["METASTATIC_PREDICTED_COMBINED"] = df_test.apply(
    refine_metastatic_with_regex, axis=1
)
df_test["LOCAL_PREDICTED_COMBINED"] = df_test.apply(refine_local_with_regex, axis=1)

## Evaluate performance

In [None]:
# Create combined labels column for ground truth
df_test.loc[
    (df_test["LOCAL"] == 1) & (df_test["METASTATIC"] == 0), "ALL_LABELS"
] = "Local_only"
df_test.loc[(df_test["LOCAL"] == 0) & (df_test["METASTATIC"] == 1), "ALL_LABELS"] = (
    "Metastatic_only"
)
df_test.loc[(df_test["LOCAL"] == 1) & (df_test["METASTATIC"] == 1), "ALL_LABELS"] = (
    "Both"
)
df_test.loc[(df_test["LOCAL"] == 0) & (df_test["METASTATIC"] == 0), "ALL_LABELS"] = (
    "Neither"
)

# Create combined labels column for regex predictions
df_test.loc[
    (df_test["LOCAL_PREDICTED_REGEX"] == 1)
    & (df_test["METASTATIC_PREDICTED_REGEX"] == 0),
    "ALL_LABELS_PREDICTIONS_REGEX",
] = "Local_only"
df_test.loc[
    (df_test["LOCAL_PREDICTED_REGEX"] == 0)
    & (df_test["METASTATIC_PREDICTED_REGEX"] == 1),
    "ALL_LABELS_PREDICTIONS_REGEX",
] = "Metastatic_only"
df_test.loc[
    (df_test["LOCAL_PREDICTED_REGEX"] == 1)
    & (df_test["METASTATIC_PREDICTED_REGEX"] == 1),
    "ALL_LABELS_PREDICTIONS_REGEX",
] = "Both"
df_test.loc[
    (df_test["LOCAL_PREDICTED_REGEX"] == 0)
    & (df_test["METASTATIC_PREDICTED_REGEX"] == 0),
    "ALL_LABELS_PREDICTIONS_REGEX",
] = "Neither"

# Create combined labels columns for ML predictions
df_test.loc[
    (df_test["LOCAL_PREDICTED_ML"] == 1)
    & (df_test["METASTATIC_PREDICTED_ML"] == 0),
    "ALL_LABELS_PREDICTIONS_ML",
] = "Local_only"
df_test.loc[
    (df_test["LOCAL_PREDICTED_ML"] == 0)
    & (df_test["METASTATIC_PREDICTED_ML"] == 1),
    "ALL_LABELS_PREDICTIONS_ML",
] = "Metastatic_only"
df_test.loc[
    (df_test["LOCAL_PREDICTED_ML"] == 1)
    & (df_test["METASTATIC_PREDICTED_ML"] == 1),
    "ALL_LABELS_PREDICTIONS_ML",
] = "Both"
df_test.loc[
    (df_test["LOCAL_PREDICTED_ML"] == 0)
    & (df_test["METASTATIC_PREDICTED_ML"] == 0),
    "ALL_LABELS_PREDICTIONS_ML",
] = "Neither"

# Create combined labels columns for combined predictions
df_test.loc[
    (df_test["LOCAL_PREDICTED_COMBINED"] == 1)
    & (df_test["METASTATIC_PREDICTED_COMBINED"] == 0),
    "ALL_LABELS_PREDICTIONS_COMBINED",
] = "Local_only"
df_test.loc[
    (df_test["LOCAL_PREDICTED_COMBINED"] == 0)
    & (df_test["METASTATIC_PREDICTED_COMBINED"] == 1),
    "ALL_LABELS_PREDICTIONS_COMBINED",
] = "Metastatic_only"
df_test.loc[
    (df_test["LOCAL_PREDICTED_COMBINED"] == 1)
    & (df_test["METASTATIC_PREDICTED_COMBINED"] == 1),
    "ALL_LABELS_PREDICTIONS_COMBINED",
] = "Both"
df_test.loc[
    (df_test["LOCAL_PREDICTED_COMBINED"] == 0)
    & (df_test["METASTATIC_PREDICTED_COMBINED"] == 0),
    "ALL_LABELS_PREDICTIONS_COMBINED",
] = "Neither"


### ML predictions

In [None]:
# Create confusion matrix of predicted and actual values for LOCAL
confusion_matrix = pd.crosstab(df_test["LOCAL"], df_test["LOCAL_PREDICTED_ML"])

# Plot the confusion matrix
fig = plt.figure(figsize=(5, 5))
sns.heatmap(confusion_matrix, annot=True, cmap="Blues", fmt="d")
plt.xlabel("Predicted localized disease allowed")
plt.ylabel("Actual localized disease allowed")
plt.xticks([0.5, 1.5], ["No", "Yes"])
plt.yticks([0.5, 1.5], ["No", "Yes"])
# Remove the colorbar but keep the width of the heatmap
plt.gca().collections[0].colorbar.remove()
plt.show()
fig.savefig("plots/confusion_matrix_local_ML.png", dpi=300, bbox_inches="tight")

In [None]:
# Calculate the accuracy of the ML prediction for LOCAL
accuracy_local_ML = (
    np.sum(df_test["LOCAL"] == df_test["LOCAL_PREDICTED_ML"])
    / df_test.shape[0]
)

# Estimate the 95 % confidence interval for the accuracy
accuracy_local_ML_std = np.sqrt(
    accuracy_local_ML * (1 - accuracy_local_ML) / df_test.shape[0]
)
accuracy_local_ML_CI = 1.96 * accuracy_local_ML_std
print(
    f"Accuracy of ML prediction for LOCAL: {accuracy_local_ML:.2f} (95% CI: {accuracy_local_ML - accuracy_local_ML_CI:.2f} - {accuracy_local_ML + accuracy_local_ML_CI:.2f})"
)

# Calculate the precision of the ML prediction for LOCAL
precision_local_ml = (
    np.sum(
        (df_test["LOCAL"] == 1) & (df_test["LOCAL_PREDICTED_ML"] == 1)
    )
    / np.sum(df_test["LOCAL_PREDICTED_ML"] == 1)
)

# Estimate the 95 % confidence interval for the precision
precision_local_ml_std = np.sqrt(
    precision_local_ml * (1 - precision_local_ml) / np.sum(df_test["LOCAL_PREDICTED_ML"] == 1)
)
precision_local_ml_CI = 1.96 * precision_local_ml_std
print(f"Precision of ML prediction for LOCAL: {precision_local_ml:.2f} (95% CI: {precision_local_ml - precision_local_ml_CI:.2f} - {precision_local_ml + precision_local_ml_CI:.2f})")

# Calculate the recall of the ML prediction for LOCAL
recall_local_ml = (
    np.sum(
        (df_test["LOCAL"] == 1) & (df_test["LOCAL_PREDICTED_ML"] == 1)
    )
    / np.sum(df_test["LOCAL"] == 1)
)

# Estimate the 95 % confidence interval for the recall
recall_local_ml_std = np.sqrt(
    recall_local_ml * (1 - recall_local_ml) / np.sum(df_test["LOCAL"] == 1)
)
recall_local_ml_CI = 1.96 * recall_local_ml_std
print(
    f"Recall of ML prediction for LOCAL: {recall_local_ml:.2f} (95% CI: {recall_local_ml - recall_local_ml_CI:.2f} - {recall_local_ml + recall_local_ml_CI:.2f})"
)

# Calculate the F1 score of the ML prediction for LOCAL
f1_local_ml = 2 * precision_local_ml * recall_local_ml / (precision_local_ml + recall_local_ml)

# Estimate the 95 % confidence interval for the F1 score
f1_local_ml_std = np.sqrt(
    f1_local_ml * (1 - f1_local_ml) / df_test.shape[0]
)
f1_local_ml_CI = 1.96 * f1_local_ml_std
print(f"F1 score of ML prediction for LOCAL: {f1_local_ml:.2f} (95% CI: {f1_local_ml - f1_local_ml_CI:.2f} - {f1_local_ml + f1_local_ml_CI:.2f})")



In [None]:
# Calculate the accuracy per family of the ML prediction for LOCAL
accuracy_local_ml_per_family = df_test.groupby("family").apply(
    lambda x: np.sum(x["LOCAL"] == x["LOCAL_PREDICTED_ML"]) / x.shape[0]
)

# Calculate the precision per family of the ML prediction for LOCAL
precision_local_ml_per_family = df_test.groupby("family").apply(
    lambda x: np.sum((x["LOCAL"] == 1) & (x["LOCAL_PREDICTED_ML"] == 1)) / np.sum(x["LOCAL_PREDICTED_ML"] == 1)
)

# Calculate the recall per family of the ML prediction for LOCAL
recall_local_ml_per_family = df_test.groupby("family").apply(
    lambda x: np.sum((x["LOCAL"] == 1) & (x["LOCAL_PREDICTED_ML"] == 1)) / np.sum(x["LOCAL"] == 1)
)

# Calculate the F1 score per family of the ML prediction for LOCAL
f1_local_ml_per_family = 2 * precision_local_ml_per_family * recall_local_ml_per_family / (precision_local_ml_per_family + recall_local_ml_per_family)
# Round the F1 values to two decimal places
f1_local_ml_per_family = f1_local_ml_per_family.round(2)
f1_local_ml_per_family


In [None]:
# Create confusion matrix of predicted and actual values for METASTATIC
confusion_matrix = pd.crosstab(df_test["METASTATIC"], df_test["METASTATIC_PREDICTED_ML"])

# Plot the confusion matrix
fig = plt.figure(figsize=(5, 5))
sns.heatmap(confusion_matrix, annot=True, cmap="Blues", fmt="d")
plt.xlabel("Predicted metastatic disease allowed")
plt.ylabel("Actual metastatic disease allowed")
plt.xticks([0.5, 1.5], ["No", "Yes"])
plt.yticks([0.5, 1.5], ["No", "Yes"])
# Remove the colorbar but keep the width of the heatmap
plt.gca().collections[0].colorbar.remove()
plt.show()
fig.savefig("plots/confusion_matrix_metastatic_ML.png", dpi=300, bbox_inches="tight")

In [None]:
# Calculate the accuracy of the ML prediction for METASTATIC
accuracy_metastatic_ml = (
    np.sum(df_test["METASTATIC"] == df_test["METASTATIC_PREDICTED_ML"])
    / df_test.shape[0]
)

# Estimate the 95 % confidence interval for the accuracy
accuracy_metastatic_ml_std = np.sqrt(
    accuracy_metastatic_ml * (1 - accuracy_metastatic_ml) / df_test.shape[0]
)
accuracy_metastatic_ml_CI = 1.96 * accuracy_metastatic_ml_std
print(
    f"Accuracy of ML prediction for METASTATIC: {accuracy_metastatic_ml:.2f} (95% CI: {accuracy_metastatic_ml - accuracy_metastatic_ml_CI:.2f} - {accuracy_metastatic_ml + accuracy_metastatic_ml_CI:.2f})"
)

# Calculate the precision of the ML prediction for METASTATIC
precision_metastatic_ml = (
    np.sum(
        (df_test["METASTATIC"] == 1) & (df_test["METASTATIC_PREDICTED_ML"] == 1)
    )
    / np.sum(df_test["METASTATIC_PREDICTED_ML"] == 1)
)

# Estimate the 95 % confidence interval for the precision
precision_metastatic_ml_std = np.sqrt(
    precision_metastatic_ml * (1 - precision_metastatic_ml) / np.sum(df_test["METASTATIC_PREDICTED_ML"] == 1)
)
precision_metastatic_ml_CI = 1.96 * precision_metastatic_ml_std
print(f"Precision of ML prediction for METASTATIC: {precision_metastatic_ml:.2f} (95% CI: {precision_metastatic_ml - precision_metastatic_ml_CI:.2f} - {precision_metastatic_ml + precision_metastatic_ml_CI:.2f})")

# Calculate the recall of the ML prediction for METASTATIC
recall_metastatic_ml = (
    np.sum(
        (df_test["METASTATIC"] == 1) & (df_test["METASTATIC_PREDICTED_ML"] == 1)
    )
    / np.sum(df_test["METASTATIC"] == 1)
)

# Estimate the 95 % confidence interval for the recall
recall_metastatic_ml_std = np.sqrt(
    recall_metastatic_ml * (1 - recall_metastatic_ml) / np.sum(df_test["METASTATIC"] == 1)
)
recall_metastatic_ml_CI = 1.96 * recall_metastatic_ml_std
print(
    f"Recall of ML prediction for METASTATIC: {recall_metastatic_ml:.2f} (95% CI: {recall_metastatic_ml - recall_metastatic_ml_CI:.2f} - {recall_metastatic_ml + recall_metastatic_ml_CI:.2f})"
)

# Calculate the F1 score of the ML prediction for METASTATIC
f1_metastatic_ml = 2 * precision_metastatic_ml * recall_metastatic_ml / (precision_metastatic_ml + recall_metastatic_ml)

# Estimate the 95 % confidence interval for the F1 score
f1_metastatic_ml_std = np.sqrt(
    f1_metastatic_ml * (1 - f1_metastatic_ml) / df_test.shape[0]
)
f1_metastatic_ml_CI = 1.96 * f1_metastatic_ml_std
print(f"F1 score of ML prediction for METASTATIC: {f1_metastatic_ml:.2f} (95% CI: {f1_metastatic_ml - f1_metastatic_ml_CI:.2f} - {f1_metastatic_ml + f1_metastatic_ml_CI:.2f})")

In [None]:
# Calculate the accuracy per family of the ML prediction for METASTATIC
accuracy_metastatic_ml_per_family = df_test.groupby("family").apply(
    lambda x: np.sum(x["METASTATIC"] == x["METASTATIC_PREDICTED_ML"]) / x.shape[0]
)

# Calculate the precision per family of the ML prediction for METASTATIC
precision_metastatic_ml_per_family = df_test.groupby("family").apply(
    lambda x: np.sum((x["METASTATIC"] == 1) & (x["METASTATIC_PREDICTED_ML"] == 1)) / np.sum(x["METASTATIC_PREDICTED_ML"] == 1)
)

# Calculate the recall per family of the ML prediction for METASTATIC
recall_metastatic_ml_per_family = df_test.groupby("family").apply(
    lambda x: np.sum((x["METASTATIC"] == 1) & (x["METASTATIC_PREDICTED_ML"] == 1)) / np.sum(x["METASTATIC"] == 1)
)

# Calculate the F1 score per family of the ML prediction for METASTATIC
f1_metastatic_ml_per_family = 2 * precision_metastatic_ml_per_family * recall_metastatic_ml_per_family / (precision_metastatic_ml_per_family + recall_metastatic_ml_per_family)

# Round the F1 values to two decimal places
f1_metastatic_ml_per_family = f1_metastatic_ml_per_family.round(2)
f1_metastatic_ml_per_family

In [None]:
# Create the confusion matrix of predicted and actual values for combined labels
confusion_matrix = pd.crosstab(
    df_test["ALL_LABELS"], df_test["ALL_LABELS_PREDICTIONS_ML"]
)

# Add a column called "Neither" and fill it with zeros if it does not exist
if "Neither" not in confusion_matrix.columns:
    confusion_matrix["Neither"] = 0
    

# Plot the confusion matrix
fig = plt.figure(figsize=(6,6))
sns.heatmap(confusion_matrix, annot=True, cmap="Blues", fmt="d")


plt.xlabel("Predicted labels")
plt.ylabel("Actual labels")
plt.xticks([0.5, 1.5, 2.5, 3.5], ["Both", "Local only", "Metastatic only", "Neither"])
plt.yticks([0.5, 1.5, 2.5, 3.5], ["Both", "Local only", "Metastatic only", "Neither"])
# Remove the colorbar but keep the width of the heatmap
plt.gca().collections[0].colorbar.remove()
plt.show()

fig.savefig("plots/confusion_matrix_all_labels_ml.png", dpi=300, bbox_inches="tight")

### Regex predictions

In [None]:
# Create confusion matrix of predicted and actual values for LOCAL using regex
confusion_matrix = pd.crosstab(df_test["LOCAL"], df_test["LOCAL_PREDICTED_REGEX"])

# Plot the confusion matrix
fig = plt.figure(figsize=(5, 5))
sns.heatmap(confusion_matrix, annot=True, cmap="Blues", fmt="d")
plt.xlabel("Predicted localized disease allowed")
plt.ylabel("Actual localized disease allowed")
plt.xticks([0.5, 1.5], ["No", "Yes"])
plt.yticks([0.5, 1.5], ["No", "Yes"])
# Remove the colorbar but keep the width of the heatmap
plt.gca().collections[0].colorbar.remove()
plt.show()
fig.savefig("plots/confusion_matrix_local_regex.png", dpi=300, bbox_inches="tight")

In [None]:
# Calculate the accuracy of the regex prediction for LOCAL
accuracy_local_regex = (
    np.sum(df_test["LOCAL"] == df_test["LOCAL_PREDICTED_REGEX"])
    / df_test.shape[0]
)

# Estimate the 95 % confidence interval for the accuracy
accuracy_local_regex_std = np.sqrt(
    accuracy_local_regex * (1 - accuracy_local_regex) / df_test.shape[0]
)
accuracy_local_regex_CI = 1.96 * accuracy_local_regex_std
print(
    f"Accuracy of regex prediction for LOCAL: {accuracy_local_regex:.2f} (95% CI: {accuracy_local_regex - accuracy_local_regex_CI:.2f} - {accuracy_local_regex + accuracy_local_regex_CI:.2f})"
)

# Calculate the precision of the regex prediction for LOCAL
precision_local_regex = (
    np.sum(
        (df_test["LOCAL"] == 1) & (df_test["LOCAL_PREDICTED_REGEX"] == 1)
    )
    / np.sum(df_test["LOCAL_PREDICTED_REGEX"] == 1)
)

# Estimate the 95 % confidence interval for the precision
precision_local_regex_std = np.sqrt(
    precision_local_regex * (1 - precision_local_regex) / np.sum(df_test["LOCAL_PREDICTED_REGEX"] == 1)
)
precision_local_regex_CI = 1.96 * precision_local_regex_std
print(f"Precision of regex prediction for LOCAL: {precision_local_regex:.2f} (95% CI: {precision_local_regex - precision_local_regex_CI:.2f} - {precision_local_regex + precision_local_regex_CI:.2f})")

# Calculate the recall of the regex prediction for LOCAL
recall_local_regex = (
    np.sum(
        (df_test["LOCAL"] == 1) & (df_test["LOCAL_PREDICTED_REGEX"] == 1)
    )
    / np.sum(df_test["LOCAL"] == 1)
)

# Estimate the 95 % confidence interval for the recall
recall_local_regex_std = np.sqrt(
    recall_local_regex * (1 - recall_local_regex) / np.sum(df_test["LOCAL"] == 1)
)
recall_local_regex_CI = 1.96 * recall_local_regex_std
print(
    f"Recall of regex prediction for LOCAL: {recall_local_regex:.2f} (95% CI: {recall_local_regex - recall_local_regex_CI:.2f} - {recall_local_regex + recall_local_regex_CI:.2f})"
)

# Calculate the F1 score of the regex prediction for LOCAL
f1_local_regex = 2 * precision_local_regex * recall_local_regex / (precision_local_regex + recall_local_regex)

# Estimate the 95 % confidence interval for the F1 score
f1_local_regex_std = np.sqrt(
    f1_local_regex * (1 - f1_local_regex) / df_test.shape[0]
)
f1_local_regex_CI = 1.96 * f1_local_regex_std
print(f"F1 score of regex prediction for LOCAL: {f1_local_regex:.2f} (95% CI: {f1_local_regex - f1_local_regex_CI:.2f} - {f1_local_regex + f1_local_regex_CI:.2f})")

In [None]:
# Calculate the accuracy per family of the regex prediction for LOCAL
accuracy_local_regex_per_family = df_test.groupby("family").apply(
    lambda x: np.sum(x["LOCAL"] == x["LOCAL_PREDICTED_REGEX"]) / x.shape[0]
)

# Calculate the precision per family of the regex prediction for LOCAL
precision_local_regex_per_family = df_test.groupby("family").apply(
    lambda x: np.sum((x["LOCAL"] == 1) & (x["LOCAL_PREDICTED_REGEX"] == 1)) / np.sum(x["LOCAL_PREDICTED_REGEX"] == 1)
)

# Calculate the recall per family of the regex prediction for LOCAL
recall_local_regex_per_family = df_test.groupby("family").apply(
    lambda x: np.sum((x["LOCAL"] == 1) & (x["LOCAL_PREDICTED_REGEX"] == 1)) / np.sum(x["LOCAL"] == 1)
)

# Calculate the F1 score per family of the regex prediction for LOCAL
f1_local_regex_per_family = 2 * precision_local_regex_per_family * recall_local_regex_per_family / (precision_local_regex_per_family + recall_local_regex_per_family)
# Round the F1 values to two decimal places
f1_local_regex_per_family = f1_local_regex_per_family.round(2)
f1_local_regex_per_family

In [None]:
# Create confusion matrix of predicted and actual values for METASTATIC using regex
confusion_matrix = pd.crosstab(df_test["METASTATIC"], df_test["METASTATIC_PREDICTED_REGEX"])

# Plot the confusion matrix
fig = plt.figure(figsize=(5, 5))
sns.heatmap(confusion_matrix, annot=True, cmap="Blues", fmt="d")
plt.xlabel("Predicted")
plt.ylabel("Actual metastatic disease allowed")
plt.xticks([0.5, 1.5], ["No", "Yes"])
plt.yticks([0.5, 1.5], ["No", "Yes"])
# Remove the colorbar but keep the width of the heatmap
plt.gca().collections[0].colorbar.remove()
plt.show()
fig.savefig("plots/confusion_matrix_metastatic_regex.png", dpi=300, bbox_inches="tight")

In [None]:
# Calculate the accuracy of the regex prediction for METASTATIC
accuracy_metastatic_regex = (
    np.sum(df_test["METASTATIC"] == df_test["METASTATIC_PREDICTED_REGEX"])
    / df_test.shape[0]
)

# Estimate the 95 % confidence interval for the accuracy
accuracy_metastatic_regex_std = np.sqrt(
    accuracy_metastatic_regex * (1 - accuracy_metastatic_regex) / df_test.shape[0]
)
accuracy_metastatic_regex_CI = 1.96 * accuracy_metastatic_regex_std
print(
    f"Accuracy of regex prediction for METASTATIC: {accuracy_metastatic_regex:.2f} (95% CI: {accuracy_metastatic_regex - accuracy_metastatic_regex_CI:.2f} - {accuracy_metastatic_regex + accuracy_metastatic_regex_CI:.2f})"
)

# Calculate the precision of the regex prediction for METASTATIC
precision_metastatic_regex = (
    np.sum(
        (df_test["METASTATIC"] == 1) & (df_test["METASTATIC_PREDICTED_REGEX"] == 1)
    )
    / np.sum(df_test["METASTATIC_PREDICTED_REGEX"] == 1)
)

# Estimate the 95 % confidence interval for the precision
precision_metastatic_regex_std = np.sqrt(
    precision_metastatic_regex * (1 - precision_metastatic_regex) / np.sum(df_test["METASTATIC_PREDICTED_REGEX"] == 1)
)
precision_metastatic_regex_CI = 1.96 * precision_metastatic_regex_std
print(f"Precision of regex prediction for METASTATIC: {precision_metastatic_regex:.2f} (95% CI: {precision_metastatic_regex - precision_metastatic_regex_CI:.2f} - {precision_metastatic_regex + precision_metastatic_regex_CI:.2f})")

# Calculate the recall of the regex prediction for METASTATIC
recall_metastatic_regex = (
    np.sum(
        (df_test["METASTATIC"] == 1) & (df_test["METASTATIC_PREDICTED_REGEX"] == 1)
    )
    / np.sum(df_test["METASTATIC"] == 1)
)

# Estimate the 95 % confidence interval for the recall
recall_metastatic_regex_std = np.sqrt(
    recall_metastatic_regex * (1 - recall_metastatic_regex) / np.sum(df_test["METASTATIC"] == 1)
)
recall_metastatic_regex_CI = 1.96 * recall_metastatic_regex_std
print(
    f"Recall of regex prediction for METASTATIC: {recall_metastatic_regex:.2f} (95% CI: {recall_metastatic_regex - recall_metastatic_regex_CI:.2f} - {recall_metastatic_regex + recall_metastatic_regex_CI:.2f})"
)

# Calculate the F1 score of the regex prediction for METASTATIC
f1_metastatic_regex = 2 * precision_metastatic_regex * recall_metastatic_regex / (precision_metastatic_regex + recall_metastatic_regex)

# Estimate the 95 % confidence interval for the F1 score
f1_metastatic_regex_std = np.sqrt(
    f1_metastatic_regex * (1 - f1_metastatic_regex) / df_test.shape[0]
)
f1_metastatic_regex_CI = 1.96 * f1_metastatic_regex_std
print(f"F1 score of regex prediction for METASTATIC: {f1_metastatic_regex:.2f} (95% CI: {f1_metastatic_regex - f1_metastatic_regex_CI:.2f} - {f1_metastatic_regex + f1_metastatic_regex_CI:.2f})")


In [None]:
# Calculate the accuracy per family of the regex prediction for METASTATIC
accuracy_metastatic_regex_per_family = df_test.groupby("family").apply(
    lambda x: np.sum(x["METASTATIC"] == x["METASTATIC_PREDICTED_REGEX"]) / x.shape[0]
)

# Calculate the precision per family of the regex prediction for METASTATIC
precision_metastatic_regex_per_family = df_test.groupby("family").apply(
    lambda x: np.sum((x["METASTATIC"] == 1) & (x["METASTATIC_PREDICTED_REGEX"] == 1)) / np.sum(x["METASTATIC_PREDICTED_REGEX"] == 1)
)

# Calculate the recall per family of the regex prediction for METASTATIC
recall_metastatic_regex_per_family = df_test.groupby("family").apply(
    lambda x: np.sum((x["METASTATIC"] == 1) & (x["METASTATIC_PREDICTED_REGEX"] == 1)) / np.sum(x["METASTATIC"] == 1)
)

# Calculate the F1 score per family of the regex prediction for METASTATIC
f1_metastatic_regex_per_family = 2 * precision_metastatic_regex_per_family * recall_metastatic_regex_per_family / (precision_metastatic_regex_per_family + recall_metastatic_regex_per_family)
# Round the F1 values to two decimal places
f1_metastatic_regex_per_family = f1_metastatic_regex_per_family.round(2)
f1_metastatic_regex_per_family

In [None]:
# Create the confusion matrix of predicted and actual values for combined labels
confusion_matrix = pd.crosstab(
    df_test["ALL_LABELS"], df_test["ALL_LABELS_PREDICTIONS_REGEX"]
)

# Plot the confusion matrix
fig = plt.figure(figsize=(6,6))
sns.heatmap(confusion_matrix, annot=True, cmap="Blues", fmt="d")
plt.xlabel("Predicted labels")
plt.ylabel("Actual labels")
plt.xticks([0.5, 1.5, 2.5, 3.5], ["Both", "Local only", "Metastatic only", "Neither"])
plt.yticks([0.5, 1.5, 2.5, 3.5], ["Both", "Local only", "Metastatic only", "Neither"])
# Remove the colorbar but keep the width of the heatmap
plt.gca().collections[0].colorbar.remove()
plt.show()

fig.savefig("plots/confusion_matrix_all_labels_regex.png", dpi=300, bbox_inches="tight")

### Combined predictions

In [None]:
# Create confusion matrix of predicted and actual values for LOCAL using combined method
confusion_matrix = pd.crosstab(df_test["LOCAL"], df_test["LOCAL_PREDICTED_COMBINED"])

# Plot the confusion matrix
fig = plt.figure(figsize=(5, 5))
sns.heatmap(confusion_matrix, annot=True, cmap="Blues", fmt="d")
plt.xlabel("Predicted localized disease allowed")
plt.ylabel("Actual localized disease allowed")
plt.xticks([0.5, 1.5], ["No", "Yes"])
plt.yticks([0.5, 1.5], ["No", "Yes"])
# Remove the colorbar but keep the width of the heatmap
plt.gca().collections[0].colorbar.remove()
plt.show()
fig.savefig("plots/confusion_matrix_local_combined.png", dpi=300, bbox_inches="tight")

In [None]:
# Calculate the accuracy of the combined prediction for LOCAL
accuracy_local_combined = (
    np.sum(df_test["LOCAL"] == df_test["LOCAL_PREDICTED_COMBINED"])
    / df_test.shape[0]
)

# Estimate the 95 % confidence interval for the accuracy
accuracy_local_combined_std = np.sqrt(
    accuracy_local_combined * (1 - accuracy_local_combined) / df_test.shape[0]
)
accuracy_local_combined_CI = 1.96 * accuracy_local_combined_std
print(
    f"Accuracy of combined prediction for LOCAL: {accuracy_local_combined:.2f} (95% CI: {accuracy_local_combined - accuracy_local_combined_CI:.2f} - {accuracy_local_combined + accuracy_local_combined_CI:.2f})"
)

# Calculate the precision of the combined prediction for LOCAL
precision_local_combined = (
    np.sum(
        (df_test["LOCAL"] == 1) & (df_test["LOCAL_PREDICTED_COMBINED"] == 1)
    )
    / np.sum(df_test["LOCAL_PREDICTED_COMBINED"] == 1)
)

# Estimate the 95 % confidence interval for the precision
precision_local_combined_std = np.sqrt(
    precision_local_combined * (1 - precision_local_combined) / np.sum(df_test["LOCAL_PREDICTED_COMBINED"] == 1)
)
precision_local_combined_CI = 1.96 * precision_local_combined_std
print(f"Precision of combined prediction for LOCAL: {precision_local_combined:.2f} (95% CI: {precision_local_combined - precision_local_combined_CI:.2f} - {precision_local_combined + precision_local_combined_CI:.2f})")

# Calculate the recall of the combined prediction for LOCAL
recall_local_combined = (
    np.sum(
        (df_test["LOCAL"] == 1) & (df_test["LOCAL_PREDICTED_COMBINED"] == 1)
    )
    / np.sum(df_test["LOCAL"] == 1)
)

# Estimate the 95 % confidence interval for the recall
recall_local_combined_std = np.sqrt(
    recall_local_combined * (1 - recall_local_combined) / np.sum(df_test["LOCAL"] == 1)
)
recall_local_combined_CI = 1.96 * recall_local_combined_std
print(
    f"Recall of combined prediction for LOCAL: {recall_local_combined:.2f} (95% CI: {recall_local_combined - recall_local_combined_CI:.2f} - {recall_local_combined + recall_local_combined_CI:.2f})"
)

# Calculate the F1 score of the combined prediction for LOCAL
f1_local_combined = 2 * precision_local_combined * recall_local_combined / (precision_local_combined + recall_local_combined)

# Estimate the 95 % confidence interval for the F1 score
f1_local_combined_std = np.sqrt(
    f1_local_combined * (1 - f1_local_combined) / df_test.shape[0]
)
f1_local_combined_CI = 1.96 * f1_local_combined_std
print(f"F1 score of combined prediction for LOCAL: {f1_local_combined:.2f} (95% CI: {f1_local_combined - f1_local_combined_CI:.2f} - {f1_local_combined + f1_local_combined_CI:.2f})")

In [None]:
# Calculate the accuracy per family of the combined prediction for LOCAL
accuracy_local_combined_per_family = df_test.groupby("family").apply(
    lambda x: np.sum(x["LOCAL"] == x["LOCAL_PREDICTED_COMBINED"]) / x.shape[0]
)

# Calculate the precision per family of the combined prediction for LOCAL
precision_local_combined_per_family = df_test.groupby("family").apply(
    lambda x: np.sum((x["LOCAL"] == 1) & (x["LOCAL_PREDICTED_COMBINED"] == 1)) / np.sum(x["LOCAL_PREDICTED_COMBINED"] == 1)
)

# Calculate the recall per family of the combined prediction for LOCAL
recall_local_combined_per_family = df_test.groupby("family").apply(
    lambda x: np.sum((x["LOCAL"] == 1) & (x["LOCAL_PREDICTED_COMBINED"] == 1)) / np.sum(x["LOCAL"] == 1)
)

# Calculate the F1 score per family of the combined prediction for LOCAL

f1_local_combined_per_family = 2 * precision_local_combined_per_family * recall_local_combined_per_family / (precision_local_combined_per_family + recall_local_combined_per_family)
# Round the F1 values to two decimal places
f1_local_combined_per_family = f1_local_combined_per_family.round(2)
f1_local_combined_per_family

In [None]:
# Create confusion matrix of predicted and actual values for METASTATIC using combined method
confusion_matrix = pd.crosstab(df_test["METASTATIC"], df_test["METASTATIC_PREDICTED_COMBINED"])

# Plot the confusion matrix
fig = plt.figure(figsize=(5, 5))
sns.heatmap(confusion_matrix, annot=True, cmap="Blues", fmt="d")
plt.xlabel("Predicted metastatic disease allowed")
plt.ylabel("Actual metastatic disease allowed")
plt.xticks([0.5, 1.5], ["No", "Yes"])
plt.yticks([0.5, 1.5], ["No", "Yes"])
# Remove the colorbar but keep the width of the heatmap
plt.gca().collections[0].colorbar.remove()
plt.show()
fig.savefig("plots/confusion_matrix_metastatic_combined.png", dpi=300, bbox_inches="tight")


In [None]:
# Calculate the accuracy of the combined prediction for METASTATIC
accuracy_metastatic_combined = (
    np.sum(df_test["METASTATIC"] == df_test["METASTATIC_PREDICTED_COMBINED"])
    / df_test.shape[0]
)

# Estimate the 95 % confidence interval for the accuracy
accuracy_metastatic_combined_std = np.sqrt(
    accuracy_metastatic_combined * (1 - accuracy_metastatic_combined) / df_test.shape[0]
)
accuracy_metastatic_combined_CI = 1.96 * accuracy_metastatic_combined_std
print(
    f"Accuracy of combined prediction for METASTATIC: {accuracy_metastatic_combined:.2f} (95% CI: {accuracy_metastatic_combined - accuracy_metastatic_combined_CI:.2f} - {accuracy_metastatic_combined + accuracy_metastatic_combined_CI:.2f})"
)

# Calculate the precision of the combined prediction for METASTATIC
precision_metastatic_combined = (
    np.sum(
        (df_test["METASTATIC"] == 1) & (df_test["METASTATIC_PREDICTED_COMBINED"] == 1)
    )
    / np.sum(df_test["METASTATIC_PREDICTED_COMBINED"] == 1)
)

# Estimate the 95 % confidence interval for the precision
precision_metastatic_combined_std = np.sqrt(
    precision_metastatic_combined * (1 - precision_metastatic_combined) / np.sum(df_test["METASTATIC_PREDICTED_COMBINED"] == 1)
)
precision_metastatic_combined_CI = 1.96 * precision_metastatic_combined_std
print(f"Precision of combined prediction for METASTATIC: {precision_metastatic_combined:.2f} (95% CI: {precision_metastatic_combined - precision_metastatic_combined_CI:.2f} - {precision_metastatic_combined + precision_metastatic_combined_CI:.2f})")

# Calculate the recall of the combined prediction for METASTATIC
recall_metastatic_combined = (
    np.sum(
        (df_test["METASTATIC"] == 1) & (df_test["METASTATIC_PREDICTED_COMBINED"] == 1)
    )
    / np.sum(df_test["METASTATIC"] == 1)
)

# Estimate the 95 % confidence interval for the recall
recall_metastatic_combined_std = np.sqrt(
    recall_metastatic_combined * (1 - recall_metastatic_combined) / np.sum(df_test["METASTATIC"] == 1)
)
recall_metastatic_combined_CI = 1.96 * recall_metastatic_combined_std
print(
    f"Recall of combined prediction for METASTATIC: {recall_metastatic_combined:.2f} (95% CI: {recall_metastatic_combined - recall_metastatic_combined_CI:.2f} - {recall_metastatic_combined + recall_metastatic_combined_CI:.2f})"
)

# Calculate the F1 score of the combined prediction for METASTATIC
f1_metastatic_combined = 2 * precision_metastatic_combined * recall_metastatic_combined / (precision_metastatic_combined + recall_metastatic_combined)

# Estimate the 95 % confidence interval for the F1 score
f1_metastatic_combined_std = np.sqrt(
    f1_metastatic_combined * (1 - f1_metastatic_combined) / df_test.shape[0]
)
f1_metastatic_combined_CI = 1.96 * f1_metastatic_combined_std
print(f"F1 score of combined prediction for METASTATIC: {f1_metastatic_combined:.2f} (95% CI: {f1_metastatic_combined - f1_metastatic_combined_CI:.2f} - {f1_metastatic_combined + f1_metastatic_combined_CI:.2f})")

In [None]:
# Calculate the accuracy per family of the combined prediction for METASTATIC
accuracy_metastatic_combined_per_family = df_test.groupby("family").apply(
    lambda x: np.sum(x["METASTATIC"] == x["METASTATIC_PREDICTED_COMBINED"]) / x.shape[0]
)

# Calculate the precision per family of the combined prediction for METASTATIC
precision_metastatic_combined_per_family = df_test.groupby("family").apply(
    lambda x: np.sum((x["METASTATIC"] == 1) & (x["METASTATIC_PREDICTED_COMBINED"] == 1)) / np.sum(x["METASTATIC_PREDICTED_COMBINED"] == 1)
)

# Calculate the recall per family of the combined prediction for METASTATIC
recall_metastatic_combined_per_family = df_test.groupby("family").apply(
    lambda x: np.sum((x["METASTATIC"] == 1) & (x["METASTATIC_PREDICTED_COMBINED"] == 1)) / np.sum(x["METASTATIC"] == 1)
)

# Calculate the F1 score per family of the combined prediction for METASTATIC
f1_metastatic_combined_per_family = 2 * precision_metastatic_combined_per_family * recall_metastatic_combined_per_family / (precision_metastatic_combined_per_family + recall_metastatic_combined_per_family)
# Round the F1 values to two decimal places
f1_metastatic_combined_per_family = f1_metastatic_combined_per_family.round(2)
f1_metastatic_combined_per_family

In [None]:
# Create the confusion matrix of predicted and actual values for combined labels
confusion_matrix = pd.crosstab(
    df_test["ALL_LABELS"], df_test["ALL_LABELS_PREDICTIONS_COMBINED"]
)

# Add a column called "Neither" and fill it with zeros if it does not exist
if "Neither" not in confusion_matrix.columns:
    confusion_matrix["Neither"] = 0

# Plot the confusion matrix
fig = plt.figure(figsize=(6,6))
sns.heatmap(confusion_matrix, annot=True, cmap="Blues", fmt="d")
plt.xlabel("Predicted labels")
plt.ylabel("Actual labels")
plt.xticks([0.5, 1.5, 2.5, 3.5], ["Both", "Local only", "Metastatic only", "Neither"])
plt.yticks([0.5, 1.5, 2.5, 3.5], ["Both", "Local only", "Metastatic only", "Neither"])
# Remove the colorbar but keep the width of the heatmap
plt.gca().collections[0].colorbar.remove()
plt.show()

fig.savefig("plots/confusion_matrix_all_labels_combined.png", dpi=300, bbox_inches="tight")