In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
from imblearn.under_sampling import RandomUnderSampler
import shap
import numpy as np
from evaluation import calculate_metrics, iterative_feature_deletion_scores, iterative_feature_addition_scores, evaluate_global_shap_scores

import warnings
warnings.filterwarnings("ignore")



  from .autonotebook import tqdm as notebook_tqdm
DEBUG:matplotlib:matplotlib data path: c:\Users\snorl\Desktop\FYP\venv\Lib\site-packages\matplotlib\mpl-data
DEBUG:matplotlib:CONFIGDIR=C:\Users\snorl\.matplotlib
DEBUG:matplotlib:interactive is False
DEBUG:matplotlib:platform is win32
DEBUG:matplotlib:CACHEDIR=C:\Users\snorl\.matplotlib
DEBUG:matplotlib.font_manager:Using fontManager instance from C:\Users\snorl\.matplotlib\fontlist-v390.json


#### Try Oversampling

In [5]:
data_path = '../../../dataset/colorectalcancers_schs_pgs.xlsx'
df = pd.read_excel(data_path)
df = df.drop(columns=['Follow-up time', 'SERNO'])
df = df.dropna()

X = df.drop(columns=['colorectal cancer'])
y = df['colorectal cancer']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_scaled, y_train)

rf = RandomForestClassifier(
    n_estimators=200,
    max_depth=20,
    min_samples_split=5,
    min_samples_leaf=2,
    class_weight='balanced',
    random_state=42,
    n_jobs=-1
)

rf.fit(X_train_resampled, y_train_resampled)

y_pred = rf.predict(X_test_scaled)
y_pred_proba = rf.predict_proba(X_test_scaled)[:, 1]

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nROC AUC Score:", roc_auc_score(y_test, y_pred_proba))

feature_importance = pd.DataFrame({
    'feature': X.columns,
    'importance': rf.feature_importances_
})
feature_importance = feature_importance.sort_values('importance', ascending=False)

print("\nTop 10 Most Important Features:")
print(feature_importance.head(10))

cv_scores = cross_val_score(
    rf, X_train_resampled, y_train_resampled, 
    cv=5, scoring='roc_auc'
)
print("\nCross-validation ROC AUC scores:", cv_scores)
print("Mean CV Score:", cv_scores.mean())
print("CV Score Std:", cv_scores.std())


Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.99      0.98      4031
           1       0.02      0.01      0.01       107

    accuracy                           0.96      4138
   macro avg       0.50      0.50      0.50      4138
weighted avg       0.95      0.96      0.96      4138


Confusion Matrix:
[[3983   48]
 [ 106    1]]

ROC AUC Score: 0.5878414252162563

Top 10 Most Important Features:
                            feature  importance
8                              aMED    0.166488
9                              DASH    0.076162
1                     Age_interview    0.074619
17                      z_pgs000734    0.063451
10                              SBP    0.059710
2                               PC1    0.056558
13  smoke_never(0)_ex(1)_current(2)    0.055306
11                              DBP    0.054126
16                      z_pgs000055    0.053842
4                               PC3    0.051924

Cross-

#### Try Downsampling

In [7]:
import pandas as pd
import numpy as np
from imblearn.under_sampling import RandomUnderSampler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
from sklearn.preprocessing import StandardScaler

data_path = '../../../dataset/colorectalcancers_schs_pgs.xlsx'
df = pd.read_excel(data_path)

df = df.drop(columns=['Follow-up time', 'SERNO'])
df = df.dropna()

X = df.drop(columns=['colorectal cancer'])
y = df['colorectal cancer']

print("Original class distribution:")
print(pd.Series(y).value_counts())

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

rus = RandomUnderSampler(random_state=42)
X_train_resampled, y_train_resampled = rus.fit_resample(X_train_scaled, y_train)

print("\nResampled class distribution:")
print(pd.Series(y_train_resampled).value_counts())

rf = RandomForestClassifier(
    n_estimators=200,
    max_depth=20,
    min_samples_split=5,
    min_samples_leaf=2,
    random_state=42,
    n_jobs=-1
)
rf.fit(X_train_resampled, y_train_resampled)

y_pred = rf.predict(X_test_scaled)
y_pred_proba = rf.predict_proba(X_test_scaled)[:, 1]

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nROC AUC Score:", roc_auc_score(y_test, y_pred_proba))

feature_importance = pd.DataFrame({
    'feature': X.columns,
    'importance': rf.feature_importances_
})
feature_importance = feature_importance.sort_values('importance', ascending=False)

print("\nTop 10 Most Important Features:")
print(feature_importance.head(10))

cv_scores = cross_val_score(
    rf, X_train_resampled, y_train_resampled, 
    cv=5, scoring='roc_auc'
)
print("\nCross-validation ROC AUC scores:", cv_scores)
print("Mean CV Score:", cv_scores.mean())
print("CV Score Std:", cv_scores.std())

Original class distribution:
colorectal cancer
0    20156
1      533
Name: count, dtype: int64

Resampled class distribution:
colorectal cancer
0    426
1    426
Name: count, dtype: int64

Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.58      0.73      4031
           1       0.04      0.60      0.07       107

    accuracy                           0.58      4138
   macro avg       0.51      0.59      0.40      4138
weighted avg       0.96      0.58      0.71      4138


Confusion Matrix:
[[2340 1691]
 [  43   64]]

ROC AUC Score: 0.606750951156574

Top 10 Most Important Features:
          feature  importance
1   Age_interview    0.132197
16    z_pgs000055    0.102974
17    z_pgs000734    0.096723
3             PC2    0.079584
4             PC3    0.071417
10            SBP    0.067636
2             PC1    0.067485
5             BMI    0.064282
7   aHEI2010score    0.063170
11            DBP    0.062310

Cross-validation 

#### Output Probabilities

In [9]:
predicted_probabilities = rf.predict_proba(X)

prob_df = pd.DataFrame(predicted_probabilities, columns=[f'Prob_Class_{i}' for i in range(predicted_probabilities.shape[1])])

result_df = pd.concat([X.reset_index(drop=True), prob_df.reset_index(drop=True)], axis=1)

result_df.to_excel('colorectal_cancer_predicted_probabilities.xlsx', index=False)

print("Predicted probabilities exported successfully to 'colorectal_cancer_probabilities.xlsx'.")


Predicted probabilities exported successfully to 'colorectal_cancer_probabilities.xlsx'.


### Colorectal Cancer Kernel SHAP Experiments

In [8]:
data_path = '../../../dataset/colorectalcancers_schs_pgs.xlsx'
df = pd.read_excel(data_path)

df = df.drop(columns=['Follow-up time', 'SERNO'])
df = df.dropna()

X = df.drop(columns=['colorectal cancer'])
y = df['colorectal cancer']

all_scores = {
    'deletion': {
        'auroc': [],
        'cross_entropy': [],
        'brier': []
    },
    'insertion': {
        'auroc': [],
        'cross_entropy': [],
        'brier': []
    }
}

seeds = [42, 123, 456, 789, 1010]
for i in seeds:
    print("Training Random Forest model...")

    rus = RandomUnderSampler(random_state=42)
    X_resampled, y_resampled = rus.fit_resample(X, y)

    X_train, X_test, y_train, y_test = train_test_split(
        X_resampled, y_resampled, test_size=0.2, random_state=i
    )

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns, index=X_train.index)
    X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns, index=X_test.index)

    rf = RandomForestClassifier(
        n_estimators=200,
        max_depth=20,
        min_samples_split=5,
        min_samples_leaf=2,
        random_state=42,
        n_jobs=-1
    )
    rf.fit(X_train_scaled, y_train)

    background_data = shap.kmeans(X_train_scaled, 100)
    explainer = shap.KernelExplainer(rf.predict_proba, background_data)
    shap_values = explainer.shap_values(X_test_scaled)

    result = evaluate_global_shap_scores(rf, X_test_scaled, y_test, shap_values)

    for method in ['deletion', 'insertion']:
        for metric in ['auroc', 'cross_entropy', 'brier']:
            all_scores[method][metric].append(result[method]["average_scores"][metric])

    import json

    print(json.dumps(result, indent=4))





Training Random Forest model...


  0%|          | 0/214 [00:00<?, ?it/s]DEBUG:shap:weight_vector = array([0.30783758, 0.16353872, 0.1162942 , 0.09345069, 0.08051137,
       0.07268387, 0.06796414, 0.06541549, 0.03230394])
DEBUG:shap:num_subset_sizes = 9
DEBUG:shap:num_paired_subset_sizes = 8
DEBUG:shap:self.M = 18
DEBUG:shap:subset_size = 1
DEBUG:shap:nsubsets = np.float64(36.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 641.5335194823491
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 17.820375541176364
DEBUG:shap:subset_size = 2
DEBUG:shap:nsubsets = np.float64(306.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 483.8853995042659
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 1.581324834981261
DEBUG:shap:subset_size = 3
DEBUG:shap:nsubsets = np.float64(1632.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 383.23005660720037
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 0.23482233860735316
INFO:shap:num_full_subsets = 2
DEBUG:shap

{
    "deletion": {
        "stepwise_metrics": {
            "auroc": [
                0.6302631578947369,
                0.606140350877193,
                0.5752631578947368,
                0.5406140350877193,
                0.5599122807017544,
                0.5659649122807018,
                0.5978947368421053,
                0.6121052631578948,
                0.5906140350877193,
                0.5849122807017544,
                0.5724122807017544,
                0.5584210526315789,
                0.5499122807017544,
                0.523640350877193,
                0.5343421052631578,
                0.5282017543859648,
                0.5099122807017544,
                0.5
            ],
            "cross_entropy": [
                0.6670223646658081,
                0.6770849565208431,
                0.6850209047216567,
                0.6982962530365295,
                0.6958150450509709,
                0.6903248135832902,
                0.681261237311977,


  0%|          | 0/214 [00:00<?, ?it/s]DEBUG:shap:weight_vector = array([0.30783758, 0.16353872, 0.1162942 , 0.09345069, 0.08051137,
       0.07268387, 0.06796414, 0.06541549, 0.03230394])
DEBUG:shap:num_subset_sizes = 9
DEBUG:shap:num_paired_subset_sizes = 8
DEBUG:shap:self.M = 18
DEBUG:shap:subset_size = 1
DEBUG:shap:nsubsets = np.float64(36.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 641.5335194823491
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 17.820375541176364
DEBUG:shap:subset_size = 2
DEBUG:shap:nsubsets = np.float64(306.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 483.8853995042659
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 1.581324834981261
DEBUG:shap:subset_size = 3
DEBUG:shap:nsubsets = np.float64(1632.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 383.23005660720037
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 0.23482233860735316
INFO:shap:num_full_subsets = 2
DEBUG:shap

{
    "deletion": {
        "stepwise_metrics": {
            "auroc": [
                0.5667365478686234,
                0.5489168413696716,
                0.5462089447938505,
                0.5564290705800141,
                0.5268169112508735,
                0.5341544374563243,
                0.5566911250873515,
                0.551974143955276,
                0.5702306079664571,
                0.613382250174703,
                0.5910639412997903,
                0.5750349406009783,
                0.5518431167016074,
                0.5045859538784068,
                0.46339972047519223,
                0.47418763102725364,
                0.48226764500349406,
                0.5
            ],
            "cross_entropy": [
                0.7049264917531135,
                0.6961714281118482,
                0.6959773069514985,
                0.6915844636190166,
                0.6997455390309603,
                0.6968987297453553,
                0.68928251895062

  0%|          | 0/214 [00:00<?, ?it/s]DEBUG:shap:weight_vector = array([0.30783758, 0.16353872, 0.1162942 , 0.09345069, 0.08051137,
       0.07268387, 0.06796414, 0.06541549, 0.03230394])
DEBUG:shap:num_subset_sizes = 9
DEBUG:shap:num_paired_subset_sizes = 8
DEBUG:shap:self.M = 18
DEBUG:shap:subset_size = 1
DEBUG:shap:nsubsets = np.float64(36.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 641.5335194823491
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 17.820375541176364
DEBUG:shap:subset_size = 2
DEBUG:shap:nsubsets = np.float64(306.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 483.8853995042659
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 1.581324834981261
DEBUG:shap:subset_size = 3
DEBUG:shap:nsubsets = np.float64(1632.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 383.23005660720037
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 0.23482233860735316
INFO:shap:num_full_subsets = 2
DEBUG:shap

{
    "deletion": {
        "stepwise_metrics": {
            "auroc": [
                0.57899366643209,
                0.5702850105559465,
                0.5151301900070373,
                0.4733462350457424,
                0.495249824067558,
                0.4868930330752991,
                0.49859254046446166,
                0.4878606615059817,
                0.4791520056298381,
                0.4802076002814919,
                0.502990851513019,
                0.5059817030260381,
                0.479371921182266,
                0.42694405348346237,
                0.5203201970443349,
                0.517637227304715,
                0.5224313863476425,
                0.5
            ],
            "cross_entropy": [
                0.6778239856458068,
                0.6945173729489057,
                0.7076193544432913,
                0.7178952266221212,
                0.7104895300197419,
                0.7117006314721035,
                0.7103001795641579,
 

  0%|          | 0/214 [00:00<?, ?it/s]DEBUG:shap:weight_vector = array([0.30783758, 0.16353872, 0.1162942 , 0.09345069, 0.08051137,
       0.07268387, 0.06796414, 0.06541549, 0.03230394])
DEBUG:shap:num_subset_sizes = 9
DEBUG:shap:num_paired_subset_sizes = 8
DEBUG:shap:self.M = 18
DEBUG:shap:subset_size = 1
DEBUG:shap:nsubsets = np.float64(36.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 641.5335194823491
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 17.820375541176364
DEBUG:shap:subset_size = 2
DEBUG:shap:nsubsets = np.float64(306.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 483.8853995042659
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 1.581324834981261
DEBUG:shap:subset_size = 3
DEBUG:shap:nsubsets = np.float64(1632.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 383.23005660720037
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 0.23482233860735316
INFO:shap:num_full_subsets = 2
DEBUG:shap

{
    "deletion": {
        "stepwise_metrics": {
            "auroc": [
                0.5509951348960637,
                0.5345422379478106,
                0.5149933657673595,
                0.5249889429455993,
                0.535249889429456,
                0.5571870853604599,
                0.5461300309597523,
                0.5419725785050863,
                0.558779301194162,
                0.5568332596196375,
                0.5857585139318885,
                0.5375939849624061,
                0.5333038478549315,
                0.5530738611233967,
                0.5191950464396285,
                0.5098186643078284,
                0.4874391862007961,
                0.5
            ],
            "cross_entropy": [
                0.7161174979154652,
                0.7027988544128,
                0.7172513398042779,
                0.7207184500252589,
                0.7142463036523006,
                0.698253570291297,
                0.6950637877345943,
   

  0%|          | 0/214 [00:00<?, ?it/s]DEBUG:shap:weight_vector = array([0.30783758, 0.16353872, 0.1162942 , 0.09345069, 0.08051137,
       0.07268387, 0.06796414, 0.06541549, 0.03230394])
DEBUG:shap:num_subset_sizes = 9
DEBUG:shap:num_paired_subset_sizes = 8
DEBUG:shap:self.M = 18
DEBUG:shap:subset_size = 1
DEBUG:shap:nsubsets = np.float64(36.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 641.5335194823491
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 17.820375541176364
DEBUG:shap:subset_size = 2
DEBUG:shap:nsubsets = np.float64(306.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 483.8853995042659
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 1.581324834981261
DEBUG:shap:subset_size = 3
DEBUG:shap:nsubsets = np.float64(1632.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 383.23005660720037
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 0.23482233860735316
INFO:shap:num_full_subsets = 2
DEBUG:shap

{
    "deletion": {
        "stepwise_metrics": {
            "auroc": [
                0.5866014499082889,
                0.5906192680583456,
                0.5811861297929949,
                0.5445017032055202,
                0.5466853000262032,
                0.5767315922788016,
                0.5856406673071884,
                0.5575159402567911,
                0.5532360904882523,
                0.5699187701982705,
                0.5918857542143419,
                0.6014499082889335,
                0.6062538212944363,
                0.5655079046204908,
                0.5406149008647044,
                0.5247619879465455,
                0.5046728971962617,
                0.5
            ],
            "cross_entropy": [
                0.6750546269328394,
                0.6755881073608002,
                0.6822251536256889,
                0.6898416394417819,
                0.6940584234468913,
                0.6886583248767756,
                0.680390340651411

In [9]:
final_results = {
    'deletion': {
        metric: {
            'mean': np.mean(scores),
            'std': np.std(scores)
        }
        for metric, scores in all_scores['deletion'].items()
    },
    'insertion': {
        metric: {
            'mean': np.mean(scores),
            'std': np.std(scores)
        }
        for metric, scores in all_scores['insertion'].items()
    }
}

print("\nFinal Results:")
print(json.dumps(final_results, indent=4))


Final Results:
{
    "deletion": {
        "auroc": {
            "mean": 0.5407941918848767,
            "std": 0.02233118252686752
        },
        "cross_entropy": {
            "mean": 0.6948535109366066,
            "std": 0.006140986860362134
        },
        "brier": {
            "mean": 0.25069153032935826,
            "std": 0.0029652746528941233
        }
    },
    "insertion": {
        "auroc": {
            "mean": 0.6263348546961374,
            "std": 0.01492308295233507
        },
        "cross_entropy": {
            "mean": 0.6766646949925195,
            "std": 0.009090833682949188
        },
        "brier": {
            "mean": 0.2410099516170343,
            "std": 0.004037043695888386
        }
    }
}


### Colorectal Cancer Tree SHAP Experiments

In [3]:
data_path = '../../../dataset/colorectalcancers_schs_pgs.xlsx'
df = pd.read_excel(data_path)

df = df.drop(columns=['Follow-up time', 'SERNO'])
df = df.dropna()

X = df.drop(columns=['colorectal cancer'])
y = df['colorectal cancer']

all_scores = {
    'deletion': {
        'auroc': [],
        'cross_entropy': [],
        'brier': []
    },
    'insertion': {
        'auroc': [],
        'cross_entropy': [],
        'brier': []
    }
}

seeds = [42, 123, 456, 789, 1010]
for i in seeds:
    print("Training Random Forest model...")

    rus = RandomUnderSampler(random_state=42)
    X_resampled, y_resampled = rus.fit_resample(X, y)

    X_train, X_test, y_train, y_test = train_test_split(
        X_resampled, y_resampled, test_size=0.2, random_state=i
    )

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns, index=X_train.index)
    X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns, index=X_test.index)

    rf = RandomForestClassifier(
        n_estimators=200,
        max_depth=20,
        min_samples_split=5,
        min_samples_leaf=2,
        random_state=42,
        n_jobs=-1
    )
    rf.fit(X_train_scaled, y_train)

    explainer = shap.TreeExplainer(rf)
    shap_values = explainer.shap_values(X_test_scaled)

    result = evaluate_global_shap_scores(rf, X_test_scaled, y_test, shap_values)

    for method in ['deletion', 'insertion']:
        for metric in ['auroc', 'cross_entropy', 'brier']:
            all_scores[method][metric].append(result[method]["average_scores"][metric])

    import json

    print(json.dumps(result, indent=4))



Training Random Forest model...
{
    "deletion": {
        "stepwise_metrics": {
            "auroc": [
                0.6302631578947369,
                0.606140350877193,
                0.5752631578947368,
                0.5406140350877193,
                0.5599122807017544,
                0.5659649122807018,
                0.5978947368421051,
                0.6121052631578948,
                0.610701754385965,
                0.597280701754386,
                0.5724561403508772,
                0.5583771929824561,
                0.5267543859649123,
                0.5241666666666667,
                0.5343421052631578,
                0.5282017543859648,
                0.5099122807017544,
                0.5
            ],
            "cross_entropy": [
                0.6670223646658081,
                0.6770849565208431,
                0.6850209047216567,
                0.6982962530365298,
                0.6958150450509708,
                0.6903248135832902,
    

In [7]:
final_results = {
    'deletion': {
        metric: {
            'mean': np.mean(scores),
            'std': np.std(scores)
        }
        for metric, scores in all_scores['deletion'].items()
    },
    'insertion': {
        metric: {
            'mean': np.mean(scores),
            'std': np.std(scores)
        }
        for metric, scores in all_scores['insertion'].items()
    }
}

print("\nFinal Results:")
print(json.dumps(final_results, indent=4))


Final Results:
{
    "deletion": {
        "auroc": {
            "mean": 0.5394816086921512,
            "std": 0.021389699529343352
        },
        "cross_entropy": {
            "mean": 0.6951038520540607,
            "std": 0.006184570012059086
        },
        "brier": {
            "mean": 0.25081647002018836,
            "std": 0.003005757801739175
        }
    },
    "insertion": {
        "auroc": {
            "mean": 0.6265093545184001,
            "std": 0.015601551687072824
        },
        "cross_entropy": {
            "mean": 0.6765534219670576,
            "std": 0.009482067393342343
        },
        "brier": {
            "mean": 0.240974946736838,
            "std": 0.004183197456195854
        }
    }
}


### Colorectal Cancer Causal SHAP Experiments

In [8]:
import pickle

base_dir = '../../../'
result_dir = base_dir + 'result/R/'

data_path = '../../../dataset/colorectalcancers_schs_pgs.xlsx'
df = pd.read_excel(data_path)
    
df = df.drop(columns=['Follow-up time', 'SERNO'])
df = df.dropna()

X = df.drop(columns=['colorectal cancer'])
y = df['colorectal cancer']

file_paths = ['../../../result/CCancer/Causal_SHAP_CCancer_42.pkl', '../../../result/CCancer/Causal_SHAP_CCancer_123.pkl', '../../../result/CCancer/Causal_SHAP_CCancer_456.pkl', '../../../result/CCancer/Causal_SHAP_CCancer_789.pkl', '../../../result/CCancer/Causal_SHAP_CCancer_1010.pkl']

def load_and_format_shap_values(file_path, feature_names):
    with open(file_path, 'rb') as f:
        data = pickle.load(f)
    
    # Convert list of dicts to numpy array
    n_samples = len(data)
    n_features = len(feature_names)
    shap_array = np.zeros((n_samples, n_features))
    
    for i, instance in enumerate(data):
        for j, feature in enumerate(feature_names):
            shap_array[i, j] = instance.get(feature, 0)
    
    return shap_array

# Usage
feature_names = X.columns.tolist()
causal_shap_values = []

for file_path in file_paths:
    formatted_values = load_and_format_shap_values(file_path, feature_names)
    causal_shap_values.append(formatted_values)

In [9]:
seeds = [42, 123, 456, 789, 1010]

all_scores = {
    'deletion': {
        'auroc': [],
        'cross_entropy': [],
        'brier': []
    },
    'insertion': {
        'auroc': [],
        'cross_entropy': [],
        'brier': []
    }
}

idx = 0
for i in seeds:
    print("Training Random Forest model...")

    rus = RandomUnderSampler(random_state=42)
    X_resampled, y_resampled = rus.fit_resample(X, y)

    X_train, X_test, y_train, y_test = train_test_split(
        X_resampled, y_resampled, test_size=0.2, random_state=i
    )

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns, index=X_train.index)
    X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns, index=X_test.index)

    rf = RandomForestClassifier(
        n_estimators=200,
        max_depth=20,
        min_samples_split=5,
        min_samples_leaf=2,
        random_state=42
    )
    rf.fit(X_train_scaled, y_train)

    result = evaluate_global_shap_scores(rf, X_test_scaled, y_test, pd.Series(np.abs(causal_shap_values[idx]).mean(axis=0), index=feature_names), causal=True)

    for method in ['deletion', 'insertion']:
        for metric in ['auroc', 'cross_entropy', 'brier']:
            all_scores[method][metric].append(result[method]["average_scores"][metric])

    import json

    print(json.dumps(result, indent=4))

    idx += 1

Training Random Forest model...
{
    "deletion": {
        "stepwise_metrics": {
            "auroc": [
                0.6481578947368422,
                0.6242105263157894,
                0.5752631578947368,
                0.5614035087719298,
                0.5646491228070175,
                0.5942105263157895,
                0.6163157894736842,
                0.6042982456140351,
                0.5719298245614035,
                0.5728070175438597,
                0.5654824561403509,
                0.5626754385964913,
                0.5499122807017544,
                0.557938596491228,
                0.5343421052631578,
                0.5282017543859648,
                0.5217543859649124,
                0.5
            ],
            "cross_entropy": [
                0.6556514921159328,
                0.6760014314792019,
                0.6850209047216567,
                0.6875990305385987,
                0.6849462423705724,
                0.6762331660386214,
  

In [10]:
final_results = {
    'deletion': {
        metric: {
            'mean': np.mean(scores),
            'std': np.std(scores)
        }
        for metric, scores in all_scores['deletion'].items()
    },
    'insertion': {
        metric: {
            'mean': np.mean(scores),
            'std': np.std(scores)
        }
        for metric, scores in all_scores['insertion'].items()
    }
}

print("\nFinal Results:")
print(json.dumps(final_results, indent=4))


Final Results:
{
    "deletion": {
        "auroc": {
            "mean": 0.5534451966444145,
            "std": 0.00933700153156182
        },
        "cross_entropy": {
            "mean": 0.6910201881251979,
            "std": 0.004460438036571515
        },
        "brier": {
            "mean": 0.24882677264369554,
            "std": 0.002145223900716154
        }
    },
    "insertion": {
        "auroc": {
            "mean": 0.6271642663840172,
            "std": 0.0190426133118931
        },
        "cross_entropy": {
            "mean": 0.6735582963032829,
            "std": 0.009696518565277585
        },
        "brier": {
            "mean": 0.23972168979116243,
            "std": 0.0044048947404250255
        }
    }
}
