In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
from imblearn.under_sampling import RandomUnderSampler
import shap
import numpy as np
from evaluation import calculate_metrics, iterative_feature_deletion_scores, iterative_feature_addition_scores, evaluate_global_shap_scores

import warnings
warnings.filterwarnings("ignore")



DEBUG:matplotlib:matplotlib data path: c:\Users\snorl\Desktop\FYP\venv\Lib\site-packages\matplotlib\mpl-data
DEBUG:matplotlib:CONFIGDIR=C:\Users\snorl\.matplotlib
DEBUG:matplotlib:interactive is False
DEBUG:matplotlib:platform is win32
DEBUG:matplotlib:CACHEDIR=C:\Users\snorl\.matplotlib
DEBUG:matplotlib.font_manager:Using fontManager instance from C:\Users\snorl\.matplotlib\fontlist-v390.json


#### Try Oversampling

In [40]:
data_path = '../../../dataset/lungcancerdataset.csv'
df = pd.read_csv(data_path)
df = df.drop(columns=['followup-time', 'serno'])
df = df.dropna()

X = df.drop(columns=['lung cancer'])
y = df['lung cancer']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_scaled, y_train)

rf = RandomForestClassifier(
    n_estimators=200,
    max_depth=20,
    min_samples_split=5,
    min_samples_leaf=2,
    class_weight='balanced',
    random_state=42,
    n_jobs=-1
)

rf.fit(X_train_resampled, y_train_resampled)

y_pred = rf.predict(X_test_scaled)
y_pred_proba = rf.predict_proba(X_test_scaled)[:, 1]

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nROC AUC Score:", roc_auc_score(y_test, y_pred_proba))

feature_importance = pd.DataFrame({
    'feature': X.columns,
    'importance': rf.feature_importances_
})
feature_importance = feature_importance.sort_values('importance', ascending=False)

print("\nTop 10 Most Important Features:")
print(feature_importance.head(10))

cv_scores = cross_val_score(
    rf, X_train_resampled, y_train_resampled, 
    cv=5, scoring='roc_auc'
)
print("\nCross-validation ROC AUC scores:", cv_scores)
print("Mean CV Score:", cv_scores.mean())
print("CV Score Std:", cv_scores.std())


Classification Report:
              precision    recall  f1-score   support

         0.0       0.98      0.98      0.98      3428
         1.0       0.04      0.05      0.04        65

    accuracy                           0.96      3493
   macro avg       0.51      0.51      0.51      3493
weighted avg       0.96      0.96      0.96      3493


Confusion Matrix:
[[3348   80]
 [  62    3]]

ROC AUC Score: 0.7164258145588367

Top 10 Most Important Features:
                            feature  importance
7   smoke_never(0)_ex(1)_current(2)    0.158827
11                             amed    0.122612
4                     age_interview    0.103831
3                               sex    0.069159
12                             dash    0.067220
19                        pgs000721    0.056407
18                        pgs000070    0.048630
14                              DBP    0.047128
13                              SBP    0.044395
1                               pc2    0.042384

Cross-

#### Try Downsampling

In [2]:
import pandas as pd
import numpy as np
from imblearn.under_sampling import RandomUnderSampler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
from sklearn.preprocessing import StandardScaler

data_path = '../../../dataset/lungcancerdataset.csv'
df = pd.read_csv(data_path)

df = df.drop(columns=['serno', 'followup-time'])
df = df.dropna(subset=['lung cancer'])

X = df.drop(columns=['lung cancer'])
y = df['lung cancer']

print("Original class distribution:")
print(pd.Series(y).value_counts())

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

rus = RandomUnderSampler(random_state=42)
X_train_resampled, y_train_resampled = rus.fit_resample(X_train_scaled, y_train)

print("\nResampled class distribution:")
print(pd.Series(y_train_resampled).value_counts())

rf = RandomForestClassifier(
    n_estimators=200,
    max_depth=20,
    min_samples_split=5,
    min_samples_leaf=2,
    random_state=42,
    n_jobs=-1
)
rf.fit(X_train_resampled, y_train_resampled)

y_pred = rf.predict(X_test_scaled)
y_pred_proba = rf.predict_proba(X_test_scaled)[:, 1]

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nROC AUC Score:", roc_auc_score(y_test, y_pred_proba))

feature_importance = pd.DataFrame({
    'feature': X.columns,
    'importance': rf.feature_importances_
})
feature_importance = feature_importance.sort_values('importance', ascending=False)

print("\nTop 10 Most Important Features:")
print(feature_importance.head(10))

cv_scores = cross_val_score(
    rf, X_train_resampled, y_train_resampled, 
    cv=5, scoring='roc_auc'
)
print("\nCross-validation ROC AUC scores:", cv_scores)
print("Mean CV Score:", cv_scores.mean())
print("CV Score Std:", cv_scores.std())

Original class distribution:
lung cancer
0.0    22452
1.0      449
Name: count, dtype: int64

Resampled class distribution:
lung cancer
0.0    359
1.0    359
Name: count, dtype: int64

Classification Report:
              precision    recall  f1-score   support

         0.0       0.99      0.71      0.82      4491
         1.0       0.04      0.64      0.08        90

    accuracy                           0.70      4581
   macro avg       0.52      0.68      0.45      4581
weighted avg       0.97      0.70      0.81      4581


Confusion Matrix:
[[3170 1321]
 [  32   58]]

ROC AUC Score: 0.7394356614463495

Top 10 Most Important Features:
                            feature  importance
4                     age_interview    0.127238
7   smoke_never(0)_ex(1)_current(2)    0.113224
19                        pgs000721    0.079644
18                        pgs000070    0.071812
6                   telomere length    0.068589
10                    ahei2010score    0.062960
5              

#### Output Probabilities

In [42]:
predicted_probabilities = rf.predict_proba(X)

prob_df = pd.DataFrame(predicted_probabilities, columns=[f'Prob_Class_{i}' for i in range(predicted_probabilities.shape[1])])

result_df = pd.concat([X.reset_index(drop=True), prob_df.reset_index(drop=True)], axis=1)

result_df.to_excel('lung_cancer_predicted_probabilities.xlsx', index=False)

print("Predicted probabilities exported successfully to 'lung_cancer_probabilities.xlsx'.")


Predicted probabilities exported successfully to 'lung_cancer_probabilities.xlsx'.


### Lung Cancer Kernel SHAP Experiments

In [None]:
data_path = '../../../dataset/lungcancerdataset.csv'
df = pd.read_csv(data_path)

df = df.drop(columns=['serno', 'followup-time'])
df = df.dropna(subset=['lung cancer'])l

X = df.drop(columns=['lung cancer'])
y = df['lung cancer']

all_scores = {
    'deletion': {
        'auroc': [],
        'cross_entropy': [],
        'brier': []
    },
    'insertion': {
        'auroc': [],
        'cross_entropy': [],
        'brier': []
    }
}

seeds = [42, 123, 456, 789, 1010]
for i in seeds:
    print("Training Random Forest model...")

    rus = RandomUnderSampler(random_state=42)
    X_resampled, y_resampled = rus.fit_resample(X, y)

    X_train, X_test, y_train, y_test = train_test_split(
        X_resampled, y_resampled, test_size=0.2, random_state=i
    )

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns, index=X_train.index)
    X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns, index=X_test.index)

    rf = RandomForestClassifier(
        n_estimators=200,
        max_depth=20,
        min_samples_split=5,
        min_samples_leaf=2,
        random_state=42,
        n_jobs=-1
    )
    rf.fit(X_train_scaled, y_train)

    background_data = shap.kmeans(X_train_scaled, 100)
    explainer = shap.KernelExplainer(rf.predict_proba, background_data)
    shap_values = explainer.shap_values(X_test_scaled)

    result = evaluate_global_shap_scores(rf, X_test_scaled, y_test, shap_values)

    for method in ['deletion', 'insertion']:
        for metric in ['auroc', 'cross_entropy', 'brier']:
            all_scores[method][metric].append(result[method]["average_scores"][metric])

    import json

    print(json.dumps(result, indent=4))



Training Random Forest model...


  0%|          | 0/180 [00:00<?, ?it/s]DEBUG:shap:weight_vector = array([0.29670485, 0.15659423, 0.1105371 , 0.08808425, 0.07516523,
       0.06711181, 0.06194937, 0.05872284, 0.05694336, 0.02818696])
DEBUG:shap:num_subset_sizes = 10
DEBUG:shap:num_paired_subset_sizes = 9
DEBUG:shap:self.M = 20
DEBUG:shap:subset_size = 1
DEBUG:shap:nsubsets = np.float64(40.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 619.5197362964481
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 15.487993407411201
DEBUG:shap:subset_size = 2
DEBUG:shap:nsubsets = np.float64(380.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 456.00340430954986
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 1.2000089587093417
DEBUG:shap:subset_size = 3
DEBUG:shap:nsubsets = np.float64(2280.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 337.25183485530096
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 0.14791747142776357
INFO:shap:num_full_subsets

{
    "deletion": {
        "stepwise_metrics": {
            "auroc": [
                0.6402476780185758,
                0.5954179566563468,
                0.5585139318885449,
                0.5087306501547988,
                0.5157894736842105,
                0.494984520123839,
                0.496594427244582,
                0.4629721362229102,
                0.4593808049535604,
                0.47343653250774,
                0.49486068111455106,
                0.5072445820433438,
                0.5023529411764707,
                0.47845201238390095,
                0.4643962848297213,
                0.45077399380804956,
                0.45052631578947366,
                0.4637151702786378,
                0.5021671826625387,
                0.5
            ],
            "cross_entropy": [
                0.6786708688134294,
                0.7118942096265851,
                0.7479030320980107,
                0.7786088066971775,
                0.781132800757002

  0%|          | 0/180 [00:00<?, ?it/s]DEBUG:shap:weight_vector = array([0.29670485, 0.15659423, 0.1105371 , 0.08808425, 0.07516523,
       0.06711181, 0.06194937, 0.05872284, 0.05694336, 0.02818696])
DEBUG:shap:num_subset_sizes = 10
DEBUG:shap:num_paired_subset_sizes = 9
DEBUG:shap:self.M = 20
DEBUG:shap:subset_size = 1
DEBUG:shap:nsubsets = np.float64(40.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 619.5197362964481
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 15.487993407411201
DEBUG:shap:subset_size = 2
DEBUG:shap:nsubsets = np.float64(380.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 456.00340430954986
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 1.2000089587093417
DEBUG:shap:subset_size = 3
DEBUG:shap:nsubsets = np.float64(2280.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 337.25183485530096
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 0.14791747142776357
INFO:shap:num_full_subsets

{
    "deletion": {
        "stepwise_metrics": {
            "auroc": [
                0.7037037037037036,
                0.6165357276468387,
                0.5326100511285696,
                0.47661803217358767,
                0.5101633620152137,
                0.4909589724404539,
                0.4914577877540841,
                0.5186432223469261,
                0.5599201895498191,
                0.5539344057862576,
                0.595959595959596,
                0.5665918443696221,
                0.5483850854221225,
                0.5244419503678762,
                0.4913954358398803,
                0.48690609801720913,
                0.5087292679885272,
                0.5097268986157876,
                0.5117845117845117,
                0.5
            ],
            "cross_entropy": [
                0.6481943684155614,
                0.7054580541262163,
                0.7480209983751097,
                0.7699702175552251,
                0.74411276086079

  0%|          | 0/180 [00:00<?, ?it/s]DEBUG:shap:weight_vector = array([0.29670485, 0.15659423, 0.1105371 , 0.08808425, 0.07516523,
       0.06711181, 0.06194937, 0.05872284, 0.05694336, 0.02818696])
DEBUG:shap:num_subset_sizes = 10
DEBUG:shap:num_paired_subset_sizes = 9
DEBUG:shap:self.M = 20
DEBUG:shap:subset_size = 1
DEBUG:shap:nsubsets = np.float64(40.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 619.5197362964481
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 15.487993407411201
DEBUG:shap:subset_size = 2
DEBUG:shap:nsubsets = np.float64(380.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 456.00340430954986
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 1.2000089587093417
DEBUG:shap:subset_size = 3
DEBUG:shap:nsubsets = np.float64(2280.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 337.25183485530096
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 0.14791747142776357
INFO:shap:num_full_subsets

{
    "deletion": {
        "stepwise_metrics": {
            "auroc": [
                0.6687406669985068,
                0.5915878546540566,
                0.5335988053758088,
                0.5434295669487307,
                0.5220258835241414,
                0.523768043802887,
                0.537954206072673,
                0.533038825286212,
                0.5492782478845196,
                0.5496515679442509,
                0.5485316077650573,
                0.5196615231458437,
                0.4805873568939771,
                0.5010577401692384,
                0.5242658038825285,
                0.5150572424091587,
                0.5128173220507716,
                0.5264435042309606,
                0.5286212045793928,
                0.5
            ],
            "cross_entropy": [
                0.6643925756760478,
                0.7297134015048815,
                0.7805556245256895,
                0.7938120019662562,
                0.8053961651062906,


  0%|          | 0/180 [00:00<?, ?it/s]DEBUG:shap:weight_vector = array([0.29670485, 0.15659423, 0.1105371 , 0.08808425, 0.07516523,
       0.06711181, 0.06194937, 0.05872284, 0.05694336, 0.02818696])
DEBUG:shap:num_subset_sizes = 10
DEBUG:shap:num_paired_subset_sizes = 9
DEBUG:shap:self.M = 20
DEBUG:shap:subset_size = 1
DEBUG:shap:nsubsets = np.float64(40.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 619.5197362964481
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 15.487993407411201
DEBUG:shap:subset_size = 2
DEBUG:shap:nsubsets = np.float64(380.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 456.00340430954986
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 1.2000089587093417
DEBUG:shap:subset_size = 3
DEBUG:shap:nsubsets = np.float64(2280.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 337.25183485530096
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 0.14791747142776357
INFO:shap:num_full_subsets

{
    "deletion": {
        "stepwise_metrics": {
            "auroc": [
                0.7055024220593714,
                0.5867594087690969,
                0.5921003602037014,
                0.5564526145820394,
                0.51061979878276,
                0.5286299838529375,
                0.5410508011427152,
                0.5283815675071419,
                0.5109924233014532,
                0.4806856291143958,
                0.5000621040864489,
                0.5031673084088932,
                0.482735063967209,
                0.4859023723761023,
                0.49602533846727115,
                0.481120357719538,
                0.4863371009812446,
                0.5057756800397466,
                0.5009315612967333,
                0.5
            ],
            "cross_entropy": [
                0.634704019631134,
                0.6854478295079618,
                0.6805119450200919,
                0.7005369271344505,
                0.7136637456261122,
 

  0%|          | 0/180 [00:00<?, ?it/s]DEBUG:shap:weight_vector = array([0.29670485, 0.15659423, 0.1105371 , 0.08808425, 0.07516523,
       0.06711181, 0.06194937, 0.05872284, 0.05694336, 0.02818696])
DEBUG:shap:num_subset_sizes = 10
DEBUG:shap:num_paired_subset_sizes = 9
DEBUG:shap:self.M = 20
DEBUG:shap:subset_size = 1
DEBUG:shap:nsubsets = np.float64(40.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 619.5197362964481
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 15.487993407411201
DEBUG:shap:subset_size = 2
DEBUG:shap:nsubsets = np.float64(380.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 456.00340430954986
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 1.2000089587093417
DEBUG:shap:subset_size = 3
DEBUG:shap:nsubsets = np.float64(2280.0)
DEBUG:shap:self.nsamples*weight_vector[subset_size-1] = 337.25183485530096
DEBUG:shap:self.nsamples*weight_vector[subset_size-1]/nsubsets = 0.14791747142776357
INFO:shap:num_full_subsets

{
    "deletion": {
        "stepwise_metrics": {
            "auroc": [
                0.6496783770410689,
                0.5982187036120732,
                0.572365165759525,
                0.5032162295893121,
                0.49628896585848586,
                0.5061850569025235,
                0.5048243443839684,
                0.49863928748144487,
                0.5056902523503216,
                0.5259772389905987,
                0.5778080158337456,
                0.5780554181098467,
                0.5411306284017813,
                0.566056407718951,
                0.46975507174666015,
                0.4335724888668976,
                0.43307768431469573,
                0.45478723404255317,
                0.5072983671449778,
                0.5
            ],
            "cross_entropy": [
                0.6574692798448797,
                0.6892900824476428,
                0.7012296140490621,
                0.7299112248299158,
                0.739150441861

In [14]:
final_results = {
    'deletion': {
        metric: {
            'mean': np.mean(scores),
            'std': np.std(scores)
        }
        for metric, scores in all_scores['deletion'].items()
    },
    'insertion': {
        metric: {
            'mean': np.mean(scores),
            'std': np.std(scores)
        }
        for metric, scores in all_scores['insertion'].items()
    }
}

print("\nFinal Results:")
print(json.dumps(final_results, indent=4))


Final Results:
{
    "deletion": {
        "auroc": {
            "mean": 0.5233499722687933,
            "std": 0.012534475756467404
        },
        "cross_entropy": {
            "mean": 0.8165854370686214,
            "std": 0.07136103236795237
        },
        "brier": {
            "mean": 0.3011165997430997,
            "std": 0.02745148860331771
        }
    },
    "insertion": {
        "auroc": {
            "mean": 0.7105614593862124,
            "std": 0.02151020152836636
        },
        "cross_entropy": {
            "mean": 0.6378104413465389,
            "std": 0.023763966264979647
        },
        "brier": {
            "mean": 0.22078371005920072,
            "std": 0.009023460805195999
        }
    }
}


### Lung Cancer Tree SHAP Experiments

In [48]:
import warnings
warnings.filterwarnings("ignore")

data_path = '../../../dataset/lungcancerdataset.csv'
df = pd.read_csv(data_path)

df = df.drop(columns=['serno', 'followup-time'])
df = df.dropna(subset=['lung cancer'])

X = df.drop(columns=['lung cancer'])
y = df['lung cancer']

all_scores = {
    'deletion': {
        'auroc': [],
        'cross_entropy': [],
        'brier': []
    },
    'insertion': {
        'auroc': [],
        'cross_entropy': [],
        'brier': []
    }
}

seeds = [42, 123, 456, 789, 1010]
for i in seeds:
    print("Training Random Forest model...")

    rus = RandomUnderSampler(random_state=42)
    X_resampled, y_resampled = rus.fit_resample(X, y)

    X_train, X_test, y_train, y_test = train_test_split(
        X_resampled, y_resampled, test_size=0.2, random_state=i
    )

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns, index=X_train.index)
    X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns, index=X_test.index)

    rf = RandomForestClassifier(
        n_estimators=200,
        max_depth=20,
        min_samples_split=5,
        min_samples_leaf=2,
        random_state=42,
        n_jobs=-1
    )
    rf.fit(X_train_scaled, y_train)

    explainer = shap.TreeExplainer(rf)
    shap_values = explainer.shap_values(X_test_scaled)

    result = evaluate_global_shap_scores(rf, X_test_scaled, y_test, shap_values)

    for method in ['deletion', 'insertion']:
        for metric in ['auroc', 'cross_entropy', 'brier']:
            all_scores[method][metric].append(result[method]["average_scores"][metric])

    import json

    print(json.dumps(result, indent=4))


Training Random Forest model...
{
    "deletion": {
        "stepwise_metrics": {
            "auroc": [
                0.6402476780185758,
                0.5954179566563468,
                0.5585139318885449,
                0.5087306501547988,
                0.5157894736842105,
                0.494984520123839,
                0.496594427244582,
                0.4629721362229102,
                0.4593188854489164,
                0.47343653250774,
                0.49486068111455106,
                0.5072445820433438,
                0.5023529411764707,
                0.47845201238390095,
                0.4643962848297213,
                0.45077399380804956,
                0.45052631578947366,
                0.4637151702786378,
                0.5021671826625387,
                0.5
            ],
            "cross_entropy": [
                0.6786708688134294,
                0.7118942096265853,
                0.7479030320980107,
                0.7786088066971774,
 

In [49]:
final_results = {
    'deletion': {
        metric: {
            'mean': np.mean(scores),
            'std': np.std(scores)
        }
        for metric, scores in all_scores['deletion'].items()
    },
    'insertion': {
        metric: {
            'mean': np.mean(scores),
            'std': np.std(scores)
        }
        for metric, scores in all_scores['insertion'].items()
    }
}

print("\nFinal Results:")
print(json.dumps(final_results, indent=4))


Final Results:
{
    "deletion": {
        "auroc": {
            "mean": 0.5227779228151659,
            "std": 0.012895156888134189
        },
        "cross_entropy": {
            "mean": 0.8172644326325438,
            "std": 0.07040396355783948
        },
        "brier": {
            "mean": 0.3014247694668432,
            "std": 0.02706566319093341
        }
    },
    "insertion": {
        "auroc": {
            "mean": 0.7110261089958968,
            "std": 0.021575733651158464
        },
        "cross_entropy": {
            "mean": 0.6371234158182337,
            "std": 0.024105106260111395
        },
        "brier": {
            "mean": 0.22058083527859665,
            "std": 0.00909575311319429
        }
    }
}


### Lung Cancer Causal SHAP Experiment

In [30]:
import pickle

data_path = '../../../dataset/lungcancerdataset.csv'
df = pd.read_csv(data_path)

df = df.drop(columns=['serno', 'followup-time'])
df = df.dropna(subset=['lung cancer'])

X = df.drop(columns=['lung cancer'])
y = df['lung cancer']

file_paths = ['../../../result/LCancer/Causal_SHAP_LCancer_42.pkl', '../../../result/LCancer/Causal_SHAP_LCancer_123.pkl', '../../../result/LCancer/Causal_SHAP_LCancer_456.pkl', '../../../result/LCancer/Causal_SHAP_LCancer_789.pkl', '../../../result/LCancer/Causal_SHAP_LCancer_1010.pkl']

def load_and_format_shap_values(file_path, feature_names):
    with open(file_path, 'rb') as f:
        data = pickle.load(f)
    
    # Convert list of dicts to numpy array
    n_samples = len(data)
    n_features = len(feature_names)
    shap_array = np.zeros((n_samples, n_features))
    
    for i, instance in enumerate(data):
        for j, feature in enumerate(feature_names):
            shap_array[i, j] = instance.get(feature, 0)
    
    return shap_array

# Usage
feature_names = X.columns.tolist()
causal_shap_values = []

for file_path in file_paths:
    formatted_values = load_and_format_shap_values(file_path, feature_names)
    causal_shap_values.append(formatted_values)

In [42]:
seeds = [42, 123, 456, 789, 1010]
# Rerun 42, 123
all_scores = {
    'deletion': {
        'auroc': [],
        'cross_entropy': [],
        'brier': []
    },
    'insertion': {
        'auroc': [],
        'cross_entropy': [],
        'brier': []
    }
}

idx = 0
for i in seeds:
    print("Training Random Forest model...")

    rus = RandomUnderSampler(random_state=42)
    X_resampled, y_resampled = rus.fit_resample(X, y)

    X_train, X_test, y_train, y_test = train_test_split(
        X_resampled, y_resampled, test_size=0.2, random_state=i
    )

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns, index=X_train.index)
    X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns, index=X_test.index)

    rf = RandomForestClassifier(
        n_estimators=200,
        max_depth=20,
        min_samples_split=5,
        min_samples_leaf=2,
        random_state=42
    )
    rf.fit(X_train_scaled, y_train)

    result = evaluate_global_shap_scores(rf, X_test_scaled, y_test, pd.Series(np.abs(causal_shap_values[idx]).mean(axis=0), index=feature_names), causal=True)

    for method in ['deletion', 'insertion']:
        for metric in ['auroc', 'cross_entropy', 'brier']:
            all_scores[method][metric].append(result[method]["average_scores"][metric])

    import json

    print(json.dumps(result, indent=4))

    idx += 1

Training Random Forest model...
{
    "deletion": {
        "stepwise_metrics": {
            "auroc": [
                0.6402476780185758,
                0.5947987616099072,
                0.5826625386996904,
                0.5705263157894738,
                0.48445820433436537,
                0.47653250773993805,
                0.5009287925696595,
                0.4974613003095975,
                0.48953560371517035,
                0.4921362229102167,
                0.488421052631579,
                0.49578947368421056,
                0.48705882352941177,
                0.4989473684210527,
                0.4761609907120743,
                0.46705882352941175,
                0.46476780185758515,
                0.48996904024767796,
                0.48736842105263156,
                0.5
            ],
            "cross_entropy": [
                0.6786708688134294,
                0.71430027034049,
                0.7350764521306314,
                0.7406257408378

In [43]:
final_results = {
    'deletion': {
        metric: {
            'mean': np.mean(scores),
            'std': np.std(scores)
        }
        for metric, scores in all_scores['deletion'].items()
    },
    'insertion': {
        metric: {
            'mean': np.mean(scores),
            'std': np.std(scores)
        }
        for metric, scores in all_scores['insertion'].items()
    }
}

print("\nFinal Results:")
print(json.dumps(final_results, indent=4))


Final Results:
{
    "deletion": {
        "auroc": {
            "mean": 0.5359552830477059,
            "std": 0.01388036276694231
        },
        "cross_entropy": {
            "mean": 0.7920336329820777,
            "std": 0.05997208637873952
        },
        "brier": {
            "mean": 0.2914056298603807,
            "std": 0.023979744941177116
        }
    },
    "insertion": {
        "auroc": {
            "mean": 0.7063550110808146,
            "std": 0.01738552559597206
        },
        "cross_entropy": {
            "mean": 0.6416731594227022,
            "std": 0.022371111921692274
        },
        "brier": {
            "mean": 0.22238536399948047,
            "std": 0.008147511746204643
        }
    }
}
