In [1]:
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split # type: ignore
import matplotlib.pyplot as plt # type: ignore

In [2]:
# Get current working directory
import os
cwd = os.getcwd()
print("Current working directory:", cwd)
os.chdir("n:/Incubator2025_ComputationalLifeCourse")

Current working directory: n:\Incubator2025_ComputationalLifeCourse\Scripts\g_comp


In [3]:
import sys
sys.path.append("Scripts/g_comp")  # relative path from your current working directory
import g_comp as gc

In [4]:
# Reload the module (if it's been edited and needs to be reloaded)
import importlib
importlib.reload(gc)

<module 'g_comp' from 'n:\\Incubator2025_ComputationalLifeCourse\\Scripts/g_comp\\g_comp.py'>

In [5]:
def set_seed(seed =42):
    """Set seed for reproducibility across multiple libraries"""
    random.seed(seed)  # Python's built-in random
    np.random.seed(seed)  # NumPy
    torch.manual_seed(seed)  # PyTorch
    torch.cuda.manual_seed_all(seed)  # PyTorch CUDA
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(2025)  # Call this at the beginning of your code

In [6]:
## Load the data 
import pandas as pd
df = pd.read_csv("Data/Processed/g_data.csv")
df = df.sort_values(["mergeid", "t_age"]) 

In [7]:
## Check the number of dropped cases 
complete_y_adl_65_75_dic_mergeids = gc.summarize_mergeid_completeness(df, ["y_adl_65_75_dic", "dt_n_years_disease_dic"], "ADL 65–75")

[ADL 65–75]
Original mergeids: 20806
Complete mergeids (no missing values in Y): 14958
Number of unique mergeids dropped: 5848



In [8]:
###############
## Data prep ##
###############

# N = number of individuals, T = number of time points (i.e., 33)
# Generate datasets for pooled and by regime 
T = 33
df_adl_65_75 = gc.get_valid_df(df, complete_y_adl_65_75_dic_mergeids, "ADL 65–75", T)
df_adl_med_65_75 = df_adl_65_75[df_adl_65_75["mod_welfare_regime_mediterranean"] == 1.0]
df_adl_cor_65_75 = df_adl_65_75[df_adl_65_75["mod_welfare_regime_corporatist"] == 1.0]
df_adl_scan_65_75 = df_adl_65_75[df_adl_65_75["mod_welfare_regime_scandinavian"] == 1.0]
 

# Mediterranean 
feature_cols_outcome_med_65_75 = gc.get_feature_cols(df_adl_med_65_75, context="outcome and med")
feature_cols_tv_covar_med_65_75 = gc.get_feature_cols(df_adl_med_65_75, context="tv_covar and med")

# Corporatist
feature_cols_outcome_cor_65_75 = gc.get_feature_cols(df_adl_cor_65_75, context="outcome and cor")
feature_cols_tv_covar_cor_65_75 = gc.get_feature_cols(df_adl_cor_65_75, context="tv_covar and cor")

# Scandinavian
feature_cols_outcome_scan_65_75 = gc.get_feature_cols(df_adl_scan_65_75, context="outcome and scan")
feature_cols_tv_covar_scan_65_75 = gc.get_feature_cols(df_adl_scan_65_75, context="tv_covar and scan")


# Convert to (N, T, D) tensor
# Treatment and covariates from the dataset 
 
# Mediterranean 
N_adl_med_65_75 = df_adl_med_65_75['mergeid'].nunique()
X_adl_med_65_75 = gc.convert_df_to_X(df_adl_med_65_75, feature_cols_outcome_med_65_75, N_adl_med_65_75, T)
X_disease_med_65_75  = gc.convert_df_to_X(df_adl_med_65_75, feature_cols_tv_covar_med_65_75, N_adl_med_65_75, T)

# Corporatist
N_adl_cor_65_75 = df_adl_cor_65_75['mergeid'].nunique()
X_adl_cor_65_75 = gc.convert_df_to_X(df_adl_cor_65_75, feature_cols_outcome_cor_65_75, N_adl_cor_65_75, T)
X_disease_cor_65_75  = gc.convert_df_to_X(df_adl_cor_65_75, feature_cols_tv_covar_cor_65_75, N_adl_cor_65_75, T)

# Scandinavian
N_adl_scan_65_75 = df_adl_scan_65_75['mergeid'].nunique()
X_adl_scan_65_75 = gc.convert_df_to_X(df_adl_scan_65_75, feature_cols_outcome_scan_65_75, N_adl_scan_65_75, T)
X_disease_scan_65_75  = gc.convert_df_to_X(df_adl_scan_65_75, feature_cols_tv_covar_scan_65_75, N_adl_scan_65_75, T)

[ADL 65–75]
Mergeids with complete outcome and 33 unique ages: 14958
[get_feature_cols] Context: outcome and med | Exclude: ['mod_country_italy']
⚠️ dt_n_years_disease_dic IS STILL INCLUDED
[get_feature_cols] Context: tv_covar and med | Exclude: ['mod_country_italy', 'dt_n_years_disease_dic']
✅ dt_n_years_disease_dic successfully excluded
[get_feature_cols] Context: outcome and cor | Exclude: ['mod_country_germany']
⚠️ dt_n_years_disease_dic IS STILL INCLUDED
[get_feature_cols] Context: tv_covar and cor | Exclude: ['mod_country_germany', 'dt_n_years_disease_dic']
✅ dt_n_years_disease_dic successfully excluded
[get_feature_cols] Context: outcome and scan | Exclude: ['mod_country_sweden']
⚠️ dt_n_years_disease_dic IS STILL INCLUDED
[get_feature_cols] Context: tv_covar and scan | Exclude: ['mod_country_sweden', 'dt_n_years_disease_dic']
✅ dt_n_years_disease_dic successfully excluded


In [9]:
###############
## Y and L_t ##
###############

# Binary outcomes  
y_adl_med_65_75_dic = gc.extract_y_tensor(df_adl_med_65_75, "y_adl_65_75_dic")
y_adl_cor_65_75_dic = gc.extract_y_tensor(df_adl_cor_65_75, "y_adl_65_75_dic")
y_adl_scan_65_75_dic = gc.extract_y_tensor(df_adl_scan_65_75, "y_adl_65_75_dic")

# Binary outcomes for tv covar 
# Mediterranean 
y_disease_med_65_75_dic = gc.extract_y_tensor(df_adl_med_65_75, "dt_n_years_disease_dic")
y_disease_cor_65_75_dic = gc.extract_y_tensor(df_adl_cor_65_75, "dt_n_years_disease_dic")
y_disease_scan_65_75_dic = gc.extract_y_tensor(df_adl_scan_65_75, "dt_n_years_disease_dic")

In [10]:
#####################
## \vec{a} and L_0 ##
#####################

# Extract high-level treatment trajectory features with covariates for each X matrix
# Mediterranean
X_med_features_adl_65_75_dic = gc.extract_features(X_adl_med_65_75, feature_cols_outcome_med_65_75)
X_med_features_disease_65_75_dic  = gc.extract_features(X_disease_med_65_75, feature_cols_tv_covar_med_65_75)

# Corporatist
X_cor_features_adl_65_75_dic = gc.extract_features(X_adl_cor_65_75, feature_cols_outcome_cor_65_75)
X_cor_features_disease_65_75_dic  = gc.extract_features(X_disease_cor_65_75, feature_cols_tv_covar_cor_65_75)

# Scandinavian
X_scan_features_adl_65_75_dic = gc.extract_features(X_adl_scan_65_75, feature_cols_outcome_scan_65_75)
X_scan_features_disease_65_75_dic  = gc.extract_features(X_disease_scan_65_75, feature_cols_tv_covar_scan_65_75)



# Feature Names
trt_features_med_adl_65_75_dic   = gc.create_feature_df(X_med_features_adl_65_75_dic, feature_cols_outcome_med_65_75, "adl 65–75")
trt_features_cor_adl_65_75_dic   = gc.create_feature_df(X_cor_features_adl_65_75_dic, feature_cols_outcome_cor_65_75, "adl 65–75")
trt_features_scan_adl_65_75_dic   = gc.create_feature_df(X_scan_features_adl_65_75_dic, feature_cols_outcome_scan_65_75, "adl 65–75")


adl 65–75: Feature dataframe shape = (4706, 48)
adl 65–75: Feature dataframe shape = (7381, 48)
adl 65–75: Feature dataframe shape = (2871, 48)


In [11]:
################### 
## Linear models ##
#################### 

y_adl_med_65_75_dic_logistic = gc.train_and_evaluate_logistic(X_med_features_adl_65_75_dic, y_adl_med_65_75_dic, "adl 65–75 (med, logistic)", save_dir="model_outputs/adl/med")
y_disease_med_65_75_dic_logistic = gc.train_and_evaluate_logistic(X_med_features_disease_65_75_dic, y_disease_med_65_75_dic, "disease 65–75 (med, logistic)", save_dir="model_outputs/adl/med")

y_adl_cor_65_75_dic_logistic = gc.train_and_evaluate_logistic(X_cor_features_adl_65_75_dic, y_adl_cor_65_75_dic, "adl 65–75 (cor, logistic)", save_dir="model_outputs/adl/cor")
y_disease_cor_65_75_dic_logistic = gc.train_and_evaluate_logistic(X_cor_features_disease_65_75_dic, y_disease_cor_65_75_dic, "disease 65–75 (cor, logistic)", save_dir="model_outputs/adl/cor")

y_adl_scan_65_75_dic_logistic = gc.train_and_evaluate_logistic(X_scan_features_adl_65_75_dic, y_adl_scan_65_75_dic, "adl 65–75 (scan, logistic)", save_dir="model_outputs/adl/scan")
y_disease_scan_65_75_dic_logistic = gc.train_and_evaluate_logistic(X_scan_features_disease_65_75_dic, y_disease_scan_65_75_dic, "disease 65–75 (scan, logistic)", save_dir="model_outputs/adl/scan")


=== adl 65–75 (med, logistic) ===
Train Accuracy: 0.6493624772313297
Test Accuracy: 0.6529745042492918
Test AUC: 0.6777746798267625

=== disease 65–75 (med, logistic) ===
Train Accuracy: 0.5950212507589556
Test Accuracy: 0.6019830028328612
Test AUC: 0.6031005849388473

=== adl 65–75 (cor, logistic) ===
Train Accuracy: 0.6488579171506
Test Accuracy: 0.6469525959367946
Test AUC: 0.6515386427265925

=== disease 65–75 (cor, logistic) ===
Train Accuracy: 0.6116918312040264
Test Accuracy: 0.6126410835214446
Test AUC: 0.6016812356227407

=== adl 65–75 (scan, logistic) ===
Train Accuracy: 0.6515679442508711
Test Accuracy: 0.6345707656612529
Test AUC: 0.5937168220832214

=== disease 65–75 (scan, logistic) ===
Train Accuracy: 0.6485813837730214
Test Accuracy: 0.6577726218097448
Test AUC: 0.6788728632478632


In [12]:
# Logistic regression coefficients 
y_adl_med_65_75_logistic_coef_summary = gc.logistic_coef_summary(
    y_adl_med_65_75_dic_logistic,
    trt_features_med_adl_65_75_dic,
    y_adl_med_65_75_dic,
    label="y_adl_med_65_75_dic_logistic",   # this will be used for the file name
    save_dir="model_outputs/adl/med"
)

y_adl_cor_65_75_logistic_coef_summary = gc.logistic_coef_summary(
    y_adl_cor_65_75_dic_logistic,
    trt_features_cor_adl_65_75_dic,
    y_adl_med_65_75_dic,
    label="y_adl_cor_65_75_dic_logistic",   # this will be used for the file name
    save_dir="model_outputs/adl/cor"
)

y_adl_scan_65_75_logistic_coef_summary = gc.logistic_coef_summary(
    y_adl_scan_65_75_dic_logistic,
    trt_features_scan_adl_65_75_dic,
    y_adl_scan_65_75_dic,
    label="y_adl_scan_65_75_dic_logistic",   # this will be used for the file name
    save_dir="model_outputs/adl/scan"
)

print(y_adl_med_65_75_logistic_coef_summary)
print(y_adl_cor_65_75_logistic_coef_summary)
print(y_adl_scan_65_75_logistic_coef_summary)

  se = np.sqrt(np.diag(cov_logit))
  z_vals = coefs / se


✅ Saved coefficient summary to: model_outputs/adl/med\y_adl_med_65_75_dic_logistic_coef_summary.csv
✅ Saved coefficient summary to: model_outputs/adl/cor\y_adl_cor_65_75_dic_logistic_coef_summary.csv
✅ Saved coefficient summary to: model_outputs/adl/scan\y_adl_scan_65_75_dic_logistic_coef_summary.csv
                      feature      coef  odds_ratio     std_error  \
0                   Intercept -0.195203    0.822667           NaN   
1               cum_full_time  0.282315    1.326196  1.344957e-01   
2               cum_part_time  0.094487    1.099095  9.058003e-02   
3                 years_cohab -0.164042    0.848706  1.582144e-01   
4             years_not_cohab  0.164042    1.178264 -0.000000e+00   
5         years_with_children  0.175950    1.192379           NaN   
6      years_without_children -0.175950    0.838660           NaN   
7     dominant_work_full_time -0.144103    0.865799           NaN   
8     dominant_work_part_time  0.065975    1.068200           NaN   
9     do

  se = np.sqrt(np.diag(cov_logit))
  se = np.sqrt(np.diag(cov_logit))


In [13]:
############################
## Tuning for best models ##
############################
# Best models for med sample 
y_adl_med_65_75_dic_model, y_adl_med_65_75_dic_best_models = gc.train_and_evaluate_with_tuning(X_med_features_adl_65_75_dic, y_adl_med_65_75_dic, "adl 65–75 (med)", save_dir="model_outputs/adl/med")
y_disease_med_65_75_dic_model, y_disease_med_65_75_dic_best_models = gc.train_and_evaluate_with_tuning(X_med_features_disease_65_75_dic, y_disease_med_65_75_dic, "disease 65–75 (med)", save_dir="model_outputs/adl/med")

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.



=== adl 65–75 (med) ===
Train Accuracy: 0.6772920461445051
Test Accuracy: 0.6402266288951841
Test AUC: 0.6929459655383929


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.



=== disease 65–75 (med) ===
Train Accuracy: 0.5768063145112325
Test Accuracy: 0.5821529745042493
Test AUC: 0.6088838964038376


In [14]:
# Best models for cor sample 
y_adl_cor_65_75_dic_model, y_adl_cor_65_75_dic_best_models = gc.train_and_evaluate_with_tuning(X_cor_features_adl_65_75_dic, y_adl_cor_65_75_dic, "adl 65–75 (cor)", save_dir="model_outputs/adl/cor")
y_disease_cor_65_75_dic_model, y_disease_cor_65_75_dic_best_models = gc.train_and_evaluate_with_tuning(X_cor_features_disease_65_75_dic, y_disease_cor_65_75_dic, "disease 65–75 (cor)", save_dir="model_outputs/adl/cor")

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.



=== adl 65–75 (cor) ===
Train Accuracy: 0.7313201703445605
Test Accuracy: 0.6320541760722348
Test AUC: 0.6541341053629622


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.



=== disease 65–75 (cor) ===
Train Accuracy: 0.6240805265195509
Test Accuracy: 0.6013544018058691
Test AUC: 0.605682550115018


In [15]:
# Best models for scan sample 
y_adl_scan_65_75_dic_model, y_adl_scan_65_75_dic_best_models = gc.train_and_evaluate_with_tuning(X_scan_features_adl_65_75_dic, y_adl_scan_65_75_dic, "adl 65–75 (scan)", save_dir="model_outputs/adl/scan")
y_disease_scan_65_75_dic_model, y_disease_scan_65_75_dic_best_models = gc.train_and_evaluate_with_tuning(X_scan_features_disease_65_75_dic, y_disease_scan_65_75_dic, "disease 65–75 (scan)", save_dir="model_outputs/adl/scan")

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.



=== adl 65–75 (scan) ===
Train Accuracy: 0.5878546540567446
Test Accuracy: 0.6125290023201856
Test AUC: 0.6279596300702253


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.



=== disease 65–75 (scan) ===
Train Accuracy: 0.8252862120457939
Test Accuracy: 0.6264501160092807
Test AUC: 0.6705573361823362


In [16]:
#############################
## Counterfactual analysis ## 
#############################

import pandas as pd
medoid_df = pd.read_csv("Data/Processed/medoid_seq_data.csv")
# Your custom emodid order
custom_order = ['AT-148850-02', 'GR-336206-02', 'Bn-067069-02', 'IT-813620-01']

# Reorder DataFrame based on this order
medoid_df = medoid_df.sort_values(["mergeid", "t_age"]) 
medoid_df = medoid_df.set_index('mergeid').loc[custom_order].reset_index()

N_medoid = medoid_df['mergeid'].nunique()
X_medoid_adl_65_75_dic   = gc.convert_df_to_X(medoid_df, feature_cols_outcome_med_65_75, N_medoid, T)
X_medoid_disease_65_75_dic  = gc.convert_df_to_X(medoid_df, feature_cols_tv_covar_med_65_75, N_medoid, T)
X_medoid_features_65_75_dic   = gc.extract_treatment_features_from_medoid(X_medoid_adl_65_75_dic, feature_cols_outcome_med_65_75) 

In [58]:
#######################################################
## Counterfactual analysis : No cohabit and No child ## 
#######################################################

import pandas as pd
medoid_no_cohabit_no_child_df = pd.read_csv("Data/Processed/medoid_seq_data_no_cohabit_no_child.csv")
# Your custom emodid order
custom_order = ['AT-148850-02', 'GR-336206-02', 'Bn-067069-02', 'IT-813620-01']

# Reorder DataFrame based on this order
medoid_no_cohabit_no_child_df = medoid_no_cohabit_no_child_df.sort_values(["mergeid", "t_age"]) 
medoid_no_cohabit_no_child_df = medoid_no_cohabit_no_child_df.set_index('mergeid').loc[custom_order].reset_index()

N_medoid_no_cohabit_no_child = medoid_no_cohabit_no_child_df['mergeid'].nunique()
X_medoid_no_cohabit_no_child_adl_65_75_dic   = gc.convert_df_to_X(medoid_no_cohabit_no_child_df, feature_cols_outcome_med_65_75, N_medoid_no_cohabit_no_child, T)
X_medoid_no_cohabit_no_child_disease_65_75_dic  = gc.convert_df_to_X(medoid_no_cohabit_no_child_df, feature_cols_tv_covar_med_65_75, N_medoid_no_cohabit_no_child, T)
X_medoid_no_cohabit_no_child_features_65_75_dic   = gc.extract_treatment_features_from_medoid(X_medoid_no_cohabit_no_child_adl_65_75_dic, feature_cols_outcome_med_65_75) 

In [61]:
#######################################################
## Counterfactual analysis : No cohabit and With child ## 
#######################################################

import pandas as pd
medoid_no_cohabit_with_child_df = pd.read_csv("Data/Processed/medoid_seq_data_no_cohabit_with_child.csv")
# Your custom emodid order
custom_order = ['AT-148850-02', 'GR-336206-02', 'Bn-067069-02', 'IT-813620-01']
 
# Reorder DataFrame based on this order
medoid_no_cohabit_with_child_df = medoid_no_cohabit_with_child_df.sort_values(["mergeid", "t_age"]) 
medoid_no_cohabit_with_child_df = medoid_no_cohabit_with_child_df.set_index('mergeid').loc[custom_order].reset_index()

N_medoid_no_cohabit_with_child = medoid_no_cohabit_with_child_df['mergeid'].nunique()
X_medoid_no_cohabit_with_child_adl_65_75_dic   = gc.convert_df_to_X(medoid_no_cohabit_with_child_df, feature_cols_outcome_med_65_75, N_medoid_no_cohabit_with_child, T)
X_medoid_no_cohabit_with_child_disease_65_75_dic  = gc.convert_df_to_X(medoid_no_cohabit_with_child_df, feature_cols_tv_covar_med_65_75, N_medoid_no_cohabit_with_child, T)
X_medoid_no_cohabit_with_child_features_65_75_dic   = gc.extract_treatment_features_from_medoid(X_medoid_no_cohabit_with_child_adl_65_75_dic, feature_cols_outcome_med_65_75) 

In [49]:
###########################################
## Hypothetical intervention with medoid ##
###########################################

# Mediterranean regime
X_features_disease_med_65_75_dic_updated_list = gc.generate_updated_list(
    X_med_features_disease_65_75_dic, X_medoid_features_65_75_dic, cols_to_replace=30)

X_features_adl_med_65_75_dic_updated_list = gc.generate_updated_list(
    X_med_features_adl_65_75_dic, X_medoid_features_65_75_dic, cols_to_replace=30)

# Corporatist regime
X_cor_features_disease_65_75_dic_updated_list = gc.generate_updated_list(
    X_cor_features_disease_65_75_dic, X_medoid_features_65_75_dic, cols_to_replace=30)

X_cor_features_adl_65_75_dic_updated_list = gc.generate_updated_list(
    X_cor_features_adl_65_75_dic, X_medoid_features_65_75_dic, cols_to_replace=30)

# Scandinavian regime
X_scan_features_disease_65_75_dic_updated_list = gc.generate_updated_list(
    X_scan_features_disease_65_75_dic, X_medoid_features_65_75_dic, cols_to_replace=30)

X_scan_features_adl_65_75_dic_updated_list = gc.generate_updated_list(
    X_scan_features_adl_65_75_dic, X_medoid_features_65_75_dic, cols_to_replace=30)

 

In [None]:
##################################################################### 
## Hypothetical intervention with medoid : No cohabit and No child ## 
##################################################################### 

# Mediterranean regime
X_features_no_cohabit_no_child_disease_med_65_75_dic_updated_list = gc.generate_updated_list(
    X_med_features_disease_65_75_dic, X_medoid_no_cohabit_no_child_features_65_75_dic, cols_to_replace=30)

X_features_no_cohabit_no_child_adl_med_65_75_dic_updated_list = gc.generate_updated_list(
    X_med_features_adl_65_75_dic, X_medoid_no_cohabit_no_child_features_65_75_dic, cols_to_replace=30)

# Corporatist regime
X_cor_features_no_cohabit_no_child_disease_65_75_dic_updated_list = gc.generate_updated_list(
    X_cor_features_disease_65_75_dic, X_medoid_no_cohabit_no_child_features_65_75_dic, cols_to_replace=30)

X_cor_features_no_cohabit_no_child_adl_65_75_dic_updated_list = gc.generate_updated_list(
    X_cor_features_adl_65_75_dic, X_medoid_no_cohabit_no_child_features_65_75_dic, cols_to_replace=30)

# Scandinavian regime
X_scan_features_no_cohabit_no_child_disease_65_75_dic_updated_list = gc.generate_updated_list(
    X_scan_features_disease_65_75_dic, X_medoid_no_cohabit_no_child_features_65_75_dic, cols_to_replace=30)

X_scan_features_no_cohabit_no_child_adl_65_75_dic_updated_list = gc.generate_updated_list(
    X_scan_features_adl_65_75_dic, X_medoid_no_cohabit_no_child_features_65_75_dic, cols_to_replace=30)


In [None]:
####################################################################### 
## Hypothetical intervention with medoid : No cohabit and With child ## 
####################################################################### 

# Mediterranean regime
X_features_no_cohabit_with_child_disease_med_65_75_dic_updated_list = gc.generate_updated_list(
    X_med_features_disease_65_75_dic, X_medoid_no_cohabit_with_child_features_65_75_dic, cols_to_replace=30)

X_features_no_cohabit_with_child_adl_med_65_75_dic_updated_list = gc.generate_updated_list(
    X_med_features_adl_65_75_dic, X_medoid_no_cohabit_with_child_features_65_75_dic, cols_to_replace=30)

# Corporatist regime
X_cor_features_no_cohabit_with_child_disease_65_75_dic_updated_list = gc.generate_updated_list(
    X_cor_features_disease_65_75_dic, X_medoid_no_cohabit_with_child_features_65_75_dic, cols_to_replace=30)

X_cor_features_no_cohabit_with_child_adl_65_75_dic_updated_list = gc.generate_updated_list(
    X_cor_features_adl_65_75_dic, X_medoid_no_cohabit_with_child_features_65_75_dic, cols_to_replace=30)

# Scandinavian regime
X_scan_features_no_cohabit_with_child_disease_65_75_dic_updated_list = gc.generate_updated_list(
    X_scan_features_disease_65_75_dic, X_medoid_no_cohabit_with_child_features_65_75_dic, cols_to_replace=30)

X_scan_features_no_cohabit_with_child_adl_65_75_dic_updated_list = gc.generate_updated_list(
    X_scan_features_adl_65_75_dic, X_medoid_no_cohabit_with_child_features_65_75_dic, cols_to_replace=30)


In [20]:
################################################# 
## refit the final model with all origina data ##
#################################################
 
 
# Mediterranean models
y_adl_med_final_models = gc.refit_and_save_final_models(
    X=X_med_features_adl_65_75_dic,
    y=y_adl_med_65_75_dic,
    base_models=y_adl_med_65_75_dic_best_models,
    label="adl 65–75 (med)",
    save=True
)

y_disease_med_final_models = gc.refit_and_save_final_models(
    X=X_med_features_disease_65_75_dic,
    y=y_disease_med_65_75_dic,
    base_models=y_disease_med_65_75_dic_best_models,
    label="disease 65–75 (med)",
    save=True
)

y_adl_med_super_learner = gc.refit_and_save_super_learner(
    X=X_med_features_adl_65_75_dic,
    y=y_adl_med_65_75_dic,
    base_models=y_adl_med_final_models,
    label="adl 65–75 (med)",
    save=True
)

y_disease_med_super_learner = gc.refit_and_save_super_learner(
    X=X_med_features_disease_65_75_dic,
    y=y_disease_med_65_75_dic,
    base_models=y_disease_med_final_models,
    label="disease 65–75 (med)",
    save=True
)


# Corporatist models
y_adl_cor_final_models = gc.refit_and_save_final_models(
    X=X_cor_features_adl_65_75_dic,
    y=y_adl_cor_65_75_dic,
    base_models=y_adl_cor_65_75_dic_best_models,
    label="adl 65–75 (cor)",
    save=True
)

y_disease_cor_final_models = gc.refit_and_save_final_models(
    X=X_cor_features_disease_65_75_dic,
    y=y_disease_cor_65_75_dic,
    base_models= y_disease_cor_65_75_dic_best_models,
    label="disease 65–75 (cor)",
    save=True
)

y_adl_cor_super_learner = gc.refit_and_save_super_learner(
    X=X_cor_features_adl_65_75_dic,
    y=y_adl_cor_65_75_dic,
    base_models=y_adl_cor_final_models,
    label="adl 65–75 (cor)",
    save=True
)

y_disease_cor_super_learner = gc.refit_and_save_super_learner(
    X=X_cor_features_disease_65_75_dic,
    y=y_disease_cor_65_75_dic,
    base_models=y_disease_cor_final_models,
    label="disease 65–75 (cor)",
    save=True
)


# Scandinavian models
y_adl_scan_final_models = gc.refit_and_save_final_models(
    X=X_scan_features_adl_65_75_dic,
    y=y_adl_scan_65_75_dic,
    base_models=y_adl_scan_65_75_dic_best_models,
    label="adl 65–75 (scan)",
    save=True
)

y_disease_scan_final_models = gc.refit_and_save_final_models(
    X=X_scan_features_disease_65_75_dic,
    y=y_disease_scan_65_75_dic,
    base_models=y_disease_scan_65_75_dic_best_models,
    label="disease 65–75 (scan)",
    save=True
)

y_adl_scan_super_learner = gc.refit_and_save_super_learner(
    X=X_scan_features_adl_65_75_dic,
    y=y_adl_scan_65_75_dic,
    base_models=y_adl_scan_final_models,
    label="adl 65–75 (scan)",
    save=True
)

y_disease_scan_super_learner = gc.refit_and_save_super_learner(
    X=X_scan_features_disease_65_75_dic,
    y=y_disease_scan_65_75_dic,
    base_models=y_disease_scan_final_models,
    label="disease 65–75 (scan)",
    save=True
)



🔁 Refitting LASSO on full data...
✅ Saved LASSO refitted model to: intermediate\adl_65-75_(med)_lasso_final_model.pkl

🔁 Refitting RF on full data...


  y = column_or_1d(y, warn=True)
  return fit_method(estimator, *args, **kwargs)


✅ Saved RF refitted model to: intermediate\adl_65-75_(med)_rf_final_model.pkl

🔁 Refitting XGB on full data...


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


✅ Saved XGB refitted model to: intermediate\adl_65-75_(med)_xgb_final_model.pkl

🔁 Refitting LASSO on full data...
✅ Saved LASSO refitted model to: intermediate\disease_65-75_(med)_lasso_final_model.pkl

🔁 Refitting RF on full data...


  y = column_or_1d(y, warn=True)
  return fit_method(estimator, *args, **kwargs)


✅ Saved RF refitted model to: intermediate\disease_65-75_(med)_rf_final_model.pkl

🔁 Refitting XGB on full data...


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


✅ Saved XGB refitted model to: intermediate\disease_65-75_(med)_xgb_final_model.pkl

🔁 Fitting SUPER LEARNER for adl 65–75 (med)...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


✅ Saved SUPER LEARNER to: intermediate\adl_65-75_(med)_super_learner.pkl

🔁 Fitting SUPER LEARNER for disease 65–75 (med)...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


✅ Saved SUPER LEARNER to: intermediate\disease_65-75_(med)_super_learner.pkl

🔁 Refitting LASSO on full data...
✅ Saved LASSO refitted model to: intermediate\adl_65-75_(cor)_lasso_final_model.pkl

🔁 Refitting RF on full data...


  y = column_or_1d(y, warn=True)
  return fit_method(estimator, *args, **kwargs)


✅ Saved RF refitted model to: intermediate\adl_65-75_(cor)_rf_final_model.pkl

🔁 Refitting XGB on full data...


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


✅ Saved XGB refitted model to: intermediate\adl_65-75_(cor)_xgb_final_model.pkl

🔁 Refitting LASSO on full data...


  y = column_or_1d(y, warn=True)


✅ Saved LASSO refitted model to: intermediate\disease_65-75_(cor)_lasso_final_model.pkl

🔁 Refitting RF on full data...


  return fit_method(estimator, *args, **kwargs)


✅ Saved RF refitted model to: intermediate\disease_65-75_(cor)_rf_final_model.pkl

🔁 Refitting XGB on full data...


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


✅ Saved XGB refitted model to: intermediate\disease_65-75_(cor)_xgb_final_model.pkl

🔁 Fitting SUPER LEARNER for adl 65–75 (cor)...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


✅ Saved SUPER LEARNER to: intermediate\adl_65-75_(cor)_super_learner.pkl

🔁 Fitting SUPER LEARNER for disease 65–75 (cor)...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


✅ Saved SUPER LEARNER to: intermediate\disease_65-75_(cor)_super_learner.pkl

🔁 Refitting LASSO on full data...
✅ Saved LASSO refitted model to: intermediate\adl_65-75_(scan)_lasso_final_model.pkl

🔁 Refitting RF on full data...


  y = column_or_1d(y, warn=True)
  return fit_method(estimator, *args, **kwargs)


✅ Saved RF refitted model to: intermediate\adl_65-75_(scan)_rf_final_model.pkl

🔁 Refitting XGB on full data...


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


✅ Saved XGB refitted model to: intermediate\adl_65-75_(scan)_xgb_final_model.pkl

🔁 Refitting LASSO on full data...


  y = column_or_1d(y, warn=True)


✅ Saved LASSO refitted model to: intermediate\disease_65-75_(scan)_lasso_final_model.pkl

🔁 Refitting RF on full data...


  return fit_method(estimator, *args, **kwargs)


✅ Saved RF refitted model to: intermediate\disease_65-75_(scan)_rf_final_model.pkl

🔁 Refitting XGB on full data...


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


✅ Saved XGB refitted model to: intermediate\disease_65-75_(scan)_xgb_final_model.pkl

🔁 Fitting SUPER LEARNER for adl 65–75 (scan)...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


✅ Saved SUPER LEARNER to: intermediate\adl_65-75_(scan)_super_learner.pkl

🔁 Fitting SUPER LEARNER for disease 65–75 (scan)...


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


✅ Saved SUPER LEARNER to: intermediate\disease_65-75_(scan)_super_learner.pkl


In [44]:
from sklearn.utils import resample

# Feature Names
trt_features_med_adl_65_75_dic   = gc.create_feature_df(X_med_features_adl_65_75_dic, feature_cols_outcome_med_65_75, "adl 65–75")
trt_features_cor_adl_65_75_dic   = gc.create_feature_df(X_cor_features_adl_65_75_dic, feature_cols_outcome_cor_65_75, "adl 65–75")
trt_features_scan_adl_65_75_dic   = gc.create_feature_df(X_scan_features_adl_65_75_dic, feature_cols_outcome_scan_65_75, "adl 65–75")


def run_mc_bootstrap_once_regime_fixed_model(seed):
    # Step 1: Bootstrap indices
    def bootstrap(X, y, seed):
        idx = resample(np.arange(len(X)), replace=True, n_samples=len(X), random_state=seed)
        return X[idx], y[idx]
 

    X_mob_med, y_mob_med = bootstrap(X_med_features_adl_65_75_dic, y_adl_med_65_75_dic, seed+1)
    X_dis_med, _ = bootstrap(X_med_features_disease_65_75_dic, y_disease_med_65_75_dic, seed+1)

    X_mob_scan, y_mob_scan = bootstrap(X_scan_features_adl_65_75_dic, y_adl_scan_65_75_dic, seed+2)
    X_dis_scan, _ = bootstrap(X_scan_features_disease_65_75_dic, y_disease_scan_65_75_dic, seed+2)

    X_mob_corp, y_mob_corp = bootstrap(X_cor_features_adl_65_75_dic, y_adl_cor_65_75_dic, seed+3)
    X_dis_corp, _ = bootstrap(X_cor_features_disease_65_75_dic, y_disease_cor_65_75_dic, seed+3)

    # Step 2: Update with medoid features
    def update_with_medoids(X_dis, X_mob):
        return gc.generate_updated_list(X_dis, X_medoid_features_65_75_dic, cols_to_replace=30), \
               gc.generate_updated_list(X_mob, X_medoid_features_65_75_dic, cols_to_replace=30)
 
    X_dis_med_updated, X_mob_med_updated = update_with_medoids(X_dis_med, X_mob_med)
    X_dis_scan_updated, X_mob_scan_updated = update_with_medoids(X_dis_scan, X_mob_scan)
    X_dis_corp_updated, X_mob_corp_updated = update_with_medoids(X_dis_corp, X_mob_corp)

    # Step 3: Use pre-trained models (must be defined globally or passed in)
    # e.g., sl_dis, sl_mob, etc. are pre-trained super learners
 
    _, _, ates_med, stand_ates_med, risk_ratios_med = gc.counterfactual_y_under_X_medoid_features(
        tv_cov_model=y_disease_med_super_learner,
        y_model=y_adl_med_super_learner,
        y_dic=y_mob_med,
        X_features_disease_65_75_dic_updated_list=X_dis_med_updated,
        X_features_outcome_65_75_dic_updated_list=X_mob_med_updated,
        tv_cov_name="dt_n_years_disease_dic",
        outcome_feature_names=trt_features_med_adl_65_75_dic.columns.tolist()
    )

    _, _, ates_cor, stand_ates_cor, risk_ratios_cor =  gc.counterfactual_y_under_X_medoid_features(
        tv_cov_model=y_disease_cor_super_learner,
        y_model=y_adl_cor_super_learner,
        y_dic=y_mob_corp,
        X_features_disease_65_75_dic_updated_list=X_dis_corp_updated,
        X_features_outcome_65_75_dic_updated_list=X_mob_corp_updated,
        tv_cov_name="dt_n_years_disease_dic",
        outcome_feature_names=trt_features_cor_adl_65_75_dic.columns.tolist()
    )

    _, _, ates_scan, stand_ates_scan, risk_ratios_scan =  gc.counterfactual_y_under_X_medoid_features(
        tv_cov_model=y_disease_scan_super_learner,
        y_model=y_adl_scan_super_learner,
        y_dic=y_mob_scan,
        X_features_disease_65_75_dic_updated_list=X_dis_scan_updated,
        X_features_outcome_65_75_dic_updated_list=X_mob_scan_updated,
        tv_cov_name="dt_n_years_disease_dic",
        outcome_feature_names=trt_features_scan_adl_65_75_dic.columns.tolist()
    )

    # Step 4: Format results
    ate_result = {"seed": seed} 
    ate_result.update(gc.flatten_ates_dict(ates_med, "Mediterranean"))
    ate_result.update(gc.flatten_ates_dict(ates_cor, "Corporatist"))
    ate_result.update(gc.flatten_ates_dict(ates_scan, "Scandinavian"))

    stand_ate_result = {"seed": seed} 
    stand_ate_result.update(gc.flatten_ates_dict(stand_ates_med, "Mediterranean"))
    stand_ate_result.update(gc.flatten_ates_dict(stand_ates_cor, "Corporatist"))
    stand_ate_result.update(gc.flatten_ates_dict(stand_ates_scan, "Scandinavian"))
    
    risk_ratio_result = {"seed": seed} 
    risk_ratio_result.update(gc.flatten_ates_dict(risk_ratios_med, "Mediterranean"))
    risk_ratio_result.update(gc.flatten_ates_dict(risk_ratios_cor, "Corporatist"))
    risk_ratio_result.update(gc.flatten_ates_dict(risk_ratios_scan, "Scandinavian"))
    

    return ate_result, stand_ate_result, risk_ratio_result



adl 65–75: Feature dataframe shape = (4706, 48)
adl 65–75: Feature dataframe shape = (7381, 48)
adl 65–75: Feature dataframe shape = (2871, 48)


In [51]:
from sklearn.utils import resample

# Feature Names
trt_features_med_adl_65_75_dic   = gc.create_feature_df(X_med_features_adl_65_75_dic, feature_cols_outcome_med_65_75, "adl 65–75")
trt_features_cor_adl_65_75_dic   = gc.create_feature_df(X_cor_features_adl_65_75_dic, feature_cols_outcome_cor_65_75, "adl 65–75")
trt_features_scan_adl_65_75_dic   = gc.create_feature_df(X_scan_features_adl_65_75_dic, feature_cols_outcome_scan_65_75, "adl 65–75")


def run_mc_bootstrap_once_regime_fixed_model_no_cohabit_no_child(seed):
    # Step 1: Bootstrap indices
    def bootstrap(X, y, seed):
        idx = resample(np.arange(len(X)), replace=True, n_samples=len(X), random_state=seed)
        return X[idx], y[idx]
 

    X_mob_med, y_mob_med = bootstrap(X_med_features_adl_65_75_dic, y_adl_med_65_75_dic, seed+1)
    X_dis_med, _ = bootstrap(X_med_features_disease_65_75_dic, y_disease_med_65_75_dic, seed+1)

    X_mob_scan, y_mob_scan = bootstrap(X_scan_features_adl_65_75_dic, y_adl_scan_65_75_dic, seed+2)
    X_dis_scan, _ = bootstrap(X_scan_features_disease_65_75_dic, y_disease_scan_65_75_dic, seed+2)

    X_mob_corp, y_mob_corp = bootstrap(X_cor_features_adl_65_75_dic, y_adl_cor_65_75_dic, seed+3)
    X_dis_corp, _ = bootstrap(X_cor_features_disease_65_75_dic, y_disease_cor_65_75_dic, seed+3)

    # Step 2: Update with medoid features
    def update_with_medoids(X_dis, X_mob):
        return gc.generate_updated_list(X_dis, X_medoid_no_cohabit_no_child_features_65_75_dic, cols_to_replace=30), \
               gc.generate_updated_list(X_mob, X_medoid_no_cohabit_no_child_features_65_75_dic, cols_to_replace=30)
 
    X_dis_med_updated, X_mob_med_updated = update_with_medoids(X_dis_med, X_mob_med)
    X_dis_scan_updated, X_mob_scan_updated = update_with_medoids(X_dis_scan, X_mob_scan)
    X_dis_corp_updated, X_mob_corp_updated = update_with_medoids(X_dis_corp, X_mob_corp)

    # Step 3: Use pre-trained models (must be defined globally or passed in)
    # e.g., sl_dis, sl_mob, etc. are pre-trained super learners
 
    _, _, ates_med, stand_ates_med, risk_ratios_med = gc.counterfactual_y_under_X_medoid_features(
        tv_cov_model=y_disease_med_super_learner,
        y_model=y_adl_med_super_learner,
        y_dic=y_mob_med,
        X_features_disease_65_75_dic_updated_list=X_dis_med_updated,
        X_features_outcome_65_75_dic_updated_list=X_mob_med_updated,
        tv_cov_name="dt_n_years_disease_dic",
        outcome_feature_names=trt_features_med_adl_65_75_dic.columns.tolist()
    )

    _, _, ates_cor, stand_ates_cor, risk_ratios_cor =  gc.counterfactual_y_under_X_medoid_features(
        tv_cov_model=y_disease_cor_super_learner,
        y_model=y_adl_cor_super_learner,
        y_dic=y_mob_corp,
        X_features_disease_65_75_dic_updated_list=X_dis_corp_updated,
        X_features_outcome_65_75_dic_updated_list=X_mob_corp_updated,
        tv_cov_name="dt_n_years_disease_dic",
        outcome_feature_names=trt_features_cor_adl_65_75_dic.columns.tolist()
    )

    _, _, ates_scan, stand_ates_scan, risk_ratios_scan =  gc.counterfactual_y_under_X_medoid_features(
        tv_cov_model=y_disease_scan_super_learner,
        y_model=y_adl_scan_super_learner,
        y_dic=y_mob_scan,
        X_features_disease_65_75_dic_updated_list=X_dis_scan_updated,
        X_features_outcome_65_75_dic_updated_list=X_mob_scan_updated,
        tv_cov_name="dt_n_years_disease_dic",
        outcome_feature_names=trt_features_scan_adl_65_75_dic.columns.tolist()
    )

    # Step 4: Format results
    ate_result = {"seed": seed} 
    ate_result.update(gc.flatten_ates_dict(ates_med, "Mediterranean"))
    ate_result.update(gc.flatten_ates_dict(ates_cor, "Corporatist"))
    ate_result.update(gc.flatten_ates_dict(ates_scan, "Scandinavian"))

    stand_ate_result = {"seed": seed} 
    stand_ate_result.update(gc.flatten_ates_dict(stand_ates_med, "Mediterranean"))
    stand_ate_result.update(gc.flatten_ates_dict(stand_ates_cor, "Corporatist"))
    stand_ate_result.update(gc.flatten_ates_dict(stand_ates_scan, "Scandinavian"))
    
    risk_ratio_result = {"seed": seed} 
    risk_ratio_result.update(gc.flatten_ates_dict(risk_ratios_med, "Mediterranean"))
    risk_ratio_result.update(gc.flatten_ates_dict(risk_ratios_cor, "Corporatist"))
    risk_ratio_result.update(gc.flatten_ates_dict(risk_ratios_scan, "Scandinavian"))
    

    return ate_result, stand_ate_result, risk_ratio_result



adl 65–75: Feature dataframe shape = (4706, 48)
adl 65–75: Feature dataframe shape = (7381, 48)
adl 65–75: Feature dataframe shape = (2871, 48)


In [64]:
from sklearn.utils import resample

# Feature Names
trt_features_med_adl_65_75_dic   = gc.create_feature_df(X_med_features_adl_65_75_dic, feature_cols_outcome_med_65_75, "adl 65–75")
trt_features_cor_adl_65_75_dic   = gc.create_feature_df(X_cor_features_adl_65_75_dic, feature_cols_outcome_cor_65_75, "adl 65–75")
trt_features_scan_adl_65_75_dic   = gc.create_feature_df(X_scan_features_adl_65_75_dic, feature_cols_outcome_scan_65_75, "adl 65–75")


def run_mc_bootstrap_once_regime_fixed_model_no_cohabit_with_child(seed):
    # Step 1: Bootstrap indices
    def bootstrap(X, y, seed):
        idx = resample(np.arange(len(X)), replace=True, n_samples=len(X), random_state=seed)
        return X[idx], y[idx]
 

    X_mob_med, y_mob_med = bootstrap(X_med_features_adl_65_75_dic, y_adl_med_65_75_dic, seed+1)
    X_dis_med, _ = bootstrap(X_med_features_disease_65_75_dic, y_disease_med_65_75_dic, seed+1)

    X_mob_scan, y_mob_scan = bootstrap(X_scan_features_adl_65_75_dic, y_adl_scan_65_75_dic, seed+2)
    X_dis_scan, _ = bootstrap(X_scan_features_disease_65_75_dic, y_disease_scan_65_75_dic, seed+2)

    X_mob_corp, y_mob_corp = bootstrap(X_cor_features_adl_65_75_dic, y_adl_cor_65_75_dic, seed+3)
    X_dis_corp, _ = bootstrap(X_cor_features_disease_65_75_dic, y_disease_cor_65_75_dic, seed+3)

    # Step 2: Update with medoid features
    def update_with_medoids(X_dis, X_mob):
        return gc.generate_updated_list(X_dis, X_medoid_no_cohabit_with_child_features_65_75_dic, cols_to_replace=30), \
               gc.generate_updated_list(X_mob, X_medoid_no_cohabit_with_child_features_65_75_dic, cols_to_replace=30)
 
    X_dis_med_updated, X_mob_med_updated = update_with_medoids(X_dis_med, X_mob_med)
    X_dis_scan_updated, X_mob_scan_updated = update_with_medoids(X_dis_scan, X_mob_scan)
    X_dis_corp_updated, X_mob_corp_updated = update_with_medoids(X_dis_corp, X_mob_corp)

    # Step 3: Use pre-trained models (must be defined globally or passed in)
    # e.g., sl_dis, sl_mob, etc. are pre-trained super learners
 
    _, _, ates_med, stand_ates_med, risk_ratios_med = gc.counterfactual_y_under_X_medoid_features(
        tv_cov_model=y_disease_med_super_learner,
        y_model=y_adl_med_super_learner,
        y_dic=y_mob_med,
        X_features_disease_65_75_dic_updated_list=X_dis_med_updated,
        X_features_outcome_65_75_dic_updated_list=X_mob_med_updated,
        tv_cov_name="dt_n_years_disease_dic",
        outcome_feature_names=trt_features_med_adl_65_75_dic.columns.tolist()
    )

    _, _, ates_cor, stand_ates_cor, risk_ratios_cor =  gc.counterfactual_y_under_X_medoid_features(
        tv_cov_model=y_disease_cor_super_learner,
        y_model=y_adl_cor_super_learner,
        y_dic=y_mob_corp,
        X_features_disease_65_75_dic_updated_list=X_dis_corp_updated,
        X_features_outcome_65_75_dic_updated_list=X_mob_corp_updated,
        tv_cov_name="dt_n_years_disease_dic",
        outcome_feature_names=trt_features_cor_adl_65_75_dic.columns.tolist()
    )

    _, _, ates_scan, stand_ates_scan, risk_ratios_scan =  gc.counterfactual_y_under_X_medoid_features(
        tv_cov_model=y_disease_scan_super_learner,
        y_model=y_adl_scan_super_learner,
        y_dic=y_mob_scan,
        X_features_disease_65_75_dic_updated_list=X_dis_scan_updated,
        X_features_outcome_65_75_dic_updated_list=X_mob_scan_updated,
        tv_cov_name="dt_n_years_disease_dic",
        outcome_feature_names=trt_features_scan_adl_65_75_dic.columns.tolist()
    )

    # Step 4: Format results
    ate_result = {"seed": seed} 
    ate_result.update(gc.flatten_ates_dict(ates_med, "Mediterranean"))
    ate_result.update(gc.flatten_ates_dict(ates_cor, "Corporatist"))
    ate_result.update(gc.flatten_ates_dict(ates_scan, "Scandinavian"))

    stand_ate_result = {"seed": seed} 
    stand_ate_result.update(gc.flatten_ates_dict(stand_ates_med, "Mediterranean"))
    stand_ate_result.update(gc.flatten_ates_dict(stand_ates_cor, "Corporatist"))
    stand_ate_result.update(gc.flatten_ates_dict(stand_ates_scan, "Scandinavian"))
    
    risk_ratio_result = {"seed": seed} 
    risk_ratio_result.update(gc.flatten_ates_dict(risk_ratios_med, "Mediterranean"))
    risk_ratio_result.update(gc.flatten_ates_dict(risk_ratios_cor, "Corporatist"))
    risk_ratio_result.update(gc.flatten_ates_dict(risk_ratios_scan, "Scandinavian"))
    

    return ate_result, stand_ate_result, risk_ratio_result



adl 65–75: Feature dataframe shape = (4706, 48)
adl 65–75: Feature dataframe shape = (7381, 48)
adl 65–75: Feature dataframe shape = (2871, 48)


In [46]:
results_adl_65_75_dic = [run_mc_bootstrap_once_regime_fixed_model(seed) for seed in range(42, 45)]
df_results_adl_65_75_dic = pd.DataFrame(results_adl_65_75_dic)


--- Medoid 0 ---
  Predicted dt_n_years_disease_dic: mean=0.001
 Potential outcome: mean=0.109
 Relative average treatment effects: -0.258: Negative sign meaning less risk of health problem

--- Medoid 1 ---
  Predicted dt_n_years_disease_dic: mean=0.005
 Potential outcome: mean=0.161
 Relative average treatment effects: 0.094: Negative sign meaning less risk of health problem

--- Medoid 2 ---
  Predicted dt_n_years_disease_dic: mean=0.003
 Potential outcome: mean=0.130
 Relative average treatment effects: -0.120: Negative sign meaning less risk of health problem

--- Medoid 3 ---
  Predicted dt_n_years_disease_dic: mean=0.007
 Potential outcome: mean=0.151
 Relative average treatment effects: 0.022: Negative sign meaning less risk of health problem

--- Medoid 0 ---
  Predicted dt_n_years_disease_dic: mean=0.000
 Potential outcome: mean=0.115
 Relative average treatment effects: -0.287: Negative sign meaning less risk of health problem

--- Medoid 1 ---
  Predicted dt_n_years_diseas

In [66]:
results_no_cohabit_no_child_adl_65_75_dic = [run_mc_bootstrap_once_regime_fixed_model_no_cohabit_no_child(seed) for seed in range(42, 43)]
df_results_no_cohabit_no_child_adl_65_75_dic = pd.DataFrame(results_no_cohabit_no_child_adl_65_75_dic)


--- Medoid 0 ---
  Predicted dt_n_years_disease_dic: mean=0.001
 Potential outcome: mean=0.143
 Relative average treatment effects: -0.031: Negative sign meaning less risk of health problem

--- Medoid 1 ---
  Predicted dt_n_years_disease_dic: mean=0.001
 Potential outcome: mean=0.184
 Relative average treatment effects: 0.246: Negative sign meaning less risk of health problem

--- Medoid 2 ---
  Predicted dt_n_years_disease_dic: mean=0.001
 Potential outcome: mean=0.144
 Relative average treatment effects: -0.023: Negative sign meaning less risk of health problem

--- Medoid 3 ---
  Predicted dt_n_years_disease_dic: mean=0.001
 Potential outcome: mean=0.166
 Relative average treatment effects: 0.129: Negative sign meaning less risk of health problem

--- Medoid 0 ---
  Predicted dt_n_years_disease_dic: mean=0.002
 Potential outcome: mean=0.140
 Relative average treatment effects: -0.134: Negative sign meaning less risk of health problem

--- Medoid 1 ---
  Predicted dt_n_years_diseas

In [None]:
results_no_cohabit_with_child_adl_65_75_dic = [run_mc_bootstrap_once_regime_fixed_model_no_cohabit_with_child(seed) for seed in range(42, 43)]
df_results_no_cohabit_with_child_adl_65_75_dic = pd.DataFrame(results_no_cohabit_with_child_adl_65_75_dic)


--- Medoid 0 ---
  Predicted dt_n_years_disease_dic: mean=0.003
 Potential outcome: mean=0.227
 Relative average treatment effects: 0.542: Negative sign meaning less risk of health problem

--- Medoid 1 ---
  Predicted dt_n_years_disease_dic: mean=0.007
 Potential outcome: mean=0.284
 Relative average treatment effects: 0.925: Negative sign meaning less risk of health problem

--- Medoid 2 ---
  Predicted dt_n_years_disease_dic: mean=0.003
 Potential outcome: mean=0.229
 Relative average treatment effects: 0.554: Negative sign meaning less risk of health problem

--- Medoid 3 ---
  Predicted dt_n_years_disease_dic: mean=0.007
 Potential outcome: mean=0.260
 Relative average treatment effects: 0.765: Negative sign meaning less risk of health problem

--- Medoid 0 ---
  Predicted dt_n_years_disease_dic: mean=0.017
 Potential outcome: mean=0.165
 Relative average treatment effects: 0.016: Negative sign meaning less risk of health problem

--- Medoid 1 ---
  Predicted dt_n_years_disease_d

In [24]:
# Fixed modles 
# Separate outcome models 

from joblib import Parallel, delayed
import pandas as pd
import time

start = time.time()

# Define number of bootstrap replicates and parallel jobs
n_bootstrap = 1000  # or any number you like
n_jobs = 8 
batch_size = 15

# Run bootstrap in parallel
bootstrap_results_adl_65_75_dic = Parallel(n_jobs=n_jobs, batch_size = batch_size)(
    delayed(run_mc_bootstrap_once_regime_fixed_model)(seed) for seed in range(42, 42 + n_bootstrap)
)

end = time.time()
print(f"Finished 1000 bootstraps in {(end - start)/60:.2f} minutes")

# Convert to DataFrame
df_bootstrap_results_adl_65_75_dic = pd.DataFrame(bootstrap_results_adl_65_75_dic)


Finished 1000 bootstraps in 6.28 minutes


In [29]:
# Separate out raw ATE, standardized ATE, and risk ratio results
raw_ate_results_adl_65_75_dic = [r[0] for r in bootstrap_results_adl_65_75_dic]
std_ate_results_adl_65_75_dic = [r[1] for r in bootstrap_results_adl_65_75_dic]
risk_ratio_results_adl_65_75_dic = [r[2] for r in bootstrap_results_adl_65_75_dic]

df_raw_ate_adl_65_75_dic = pd.DataFrame(raw_ate_results_adl_65_75_dic)
df_std_ate_adl_65_75_dic = pd.DataFrame(std_ate_results_adl_65_75_dic)
df_risk_ratio_adl_65_75_dic = pd.DataFrame(risk_ratio_results_adl_65_75_dic)

df_raw_ate_summary_adl_65_75_dic = gc.summarize_bootstrap_percentile_ci(df_raw_ate_adl_65_75_dic)
df_std_ate_summary_adl_65_75_dic = gc.summarize_bootstrap_percentile_ci(df_std_ate_adl_65_75_dic)
df_risk_ratio_summary_adl_65_75_dic = gc.summarize_bootstrap_percentile_ci(df_risk_ratio_adl_65_75_dic, risk_ratio=True)

# Save
gc.save_results_df(df_raw_ate_adl_65_75_dic, prefix = "raw_ate_df", label="adl 65–75")
gc.save_results_df(df_std_ate_adl_65_75_dic, prefix = "std_ate_df", label="adl 65–75")
gc.save_results_df(df_risk_ratio_adl_65_75_dic, prefix = "risk_ratio_df", label="adl 65–75")


gc.save_results_df(df_raw_ate_summary_adl_65_75_dic, prefix = "raw_ate_summary", label="adl 65–75")
gc.save_results_df(df_std_ate_summary_adl_65_75_dic, prefix = "std_ate_summary", label="adl 65–75")
gc.save_results_df(df_risk_ratio_summary_adl_65_75_dic, prefix = "risk_ratio_summary", label="adl 65–75")

Saved: Results\raw_ate_df_adl_65-75_all.csv
Saved: Results\std_ate_df_adl_65-75_all.csv
Saved: Results\risk_ratio_df_adl_65-75_all.csv
Saved: Results\raw_ate_summary_adl_65-75_all.csv
Saved: Results\std_ate_summary_adl_65-75_all.csv
Saved: Results\risk_ratio_summary_adl_65-75_all.csv


In [67]:
# Fixed modles 
# Separate outcome models 

from joblib import Parallel, delayed
import pandas as pd
import time

start = time.time()

# Define number of bootstrap replicates and parallel jobs
n_bootstrap = 1000  # or any number you like
n_jobs = 8 
batch_size = 15

# Run bootstrap in parallel
bootstrap_results_no_cohabit_no_child_adl_65_75_dic = Parallel(n_jobs=n_jobs, batch_size = batch_size)(
    delayed(run_mc_bootstrap_once_regime_fixed_model_no_cohabit_no_child)(seed) for seed in range(42, 42 + n_bootstrap)
)

end = time.time()
print(f"Finished 1000 bootstraps in {(end - start)/60:.2f} minutes")

# Convert to DataFrame
df_bootstrap_results_no_cohabit_no_child_adl_65_75_dic = pd.DataFrame(bootstrap_results_no_cohabit_no_child_adl_65_75_dic)


Finished 1000 bootstraps in 6.05 minutes


In [68]:
# Separate out raw ATE, standardized ATE, and risk ratio results
raw_ate_results_no_cohabit_no_child_adl_65_75_dic = [r[0] for r in bootstrap_results_no_cohabit_no_child_adl_65_75_dic]
std_ate_results_no_cohabit_no_child_adl_65_75_dic = [r[1] for r in bootstrap_results_no_cohabit_no_child_adl_65_75_dic]
risk_ratio_results_no_cohabit_no_child_adl_65_75_dic = [r[2] for r in bootstrap_results_no_cohabit_no_child_adl_65_75_dic]

df_raw_ate_no_cohabit_no_child_adl_65_75_dic = pd.DataFrame(raw_ate_results_no_cohabit_no_child_adl_65_75_dic)
df_std_ate_no_cohabit_no_child_adl_65_75_dic = pd.DataFrame(std_ate_results_no_cohabit_no_child_adl_65_75_dic)
df_risk_ratio_no_cohabit_no_child_adl_65_75_dic = pd.DataFrame(risk_ratio_results_no_cohabit_no_child_adl_65_75_dic)

df_raw_ate_no_cohabit_no_child_summary_adl_65_75_dic = gc.summarize_bootstrap_percentile_ci(df_raw_ate_no_cohabit_no_child_adl_65_75_dic)
df_std_ate_no_cohabit_no_child_summary_adl_65_75_dic = gc.summarize_bootstrap_percentile_ci(df_std_ate_no_cohabit_no_child_adl_65_75_dic)
df_risk_ratio_no_cohabit_no_child_summary_adl_65_75_dic = gc.summarize_bootstrap_percentile_ci(df_risk_ratio_no_cohabit_no_child_adl_65_75_dic, risk_ratio=True)

# Save
gc.save_results_df(df_raw_ate_no_cohabit_no_child_adl_65_75_dic, prefix = "raw_ate_no_cohabit_no_child_df", label="adl 65–75")
gc.save_results_df(df_std_ate_no_cohabit_no_child_adl_65_75_dic, prefix = "std_ate_no_cohabit_no_child_df", label="adl 65–75")
gc.save_results_df(df_risk_ratio_no_cohabit_no_child_adl_65_75_dic, prefix = "risk_ratio_no_cohabit_no_child_df", label="adl 65–75")


gc.save_results_df(df_raw_ate_no_cohabit_no_child_summary_adl_65_75_dic, prefix = "raw_ate_summary_no_cohabit_no_child", label="adl 65–75")
gc.save_results_df(df_std_ate_no_cohabit_no_child_summary_adl_65_75_dic, prefix = "std_ate_summary_no_cohabit_no_child", label="adl 65–75")
gc.save_results_df(df_risk_ratio_no_cohabit_no_child_summary_adl_65_75_dic, prefix = "risk_ratio_summary_no_cohabit_no_child", label="adl 65–75")

Saved: Results\raw_ate_no_cohabit_no_child_df_adl_65-75_all.csv
Saved: Results\std_ate_no_cohabit_no_child_df_adl_65-75_all.csv
Saved: Results\risk_ratio_no_cohabit_no_child_df_adl_65-75_all.csv
Saved: Results\raw_ate_summary_no_cohabit_no_child_adl_65-75_all.csv
Saved: Results\std_ate_summary_no_cohabit_no_child_adl_65-75_all.csv
Saved: Results\risk_ratio_summary_no_cohabit_no_child_adl_65-75_all.csv


In [72]:
# Fixed modles 
# Separate outcome models 

from joblib import Parallel, delayed
import pandas as pd
import time

start = time.time()

# Define number of bootstrap replicates and parallel jobs
n_bootstrap = 1000  # or any number you like
n_jobs = 8 
batch_size = 15

# Run bootstrap in parallel
bootstrap_results_no_cohabit_with_child_adl_65_75_dic = Parallel(n_jobs=n_jobs, batch_size = batch_size)(
    delayed(run_mc_bootstrap_once_regime_fixed_model_no_cohabit_with_child)(seed) for seed in range(42, 42 + n_bootstrap)
)

end = time.time()
print(f"Finished 1000 bootstraps in {(end - start)/60:.2f} minutes")

# Convert to DataFrame
df_bootstrap_results_no_cohabit_with_child_adl_65_75_dic = pd.DataFrame(bootstrap_results_no_cohabit_with_child_adl_65_75_dic)


Finished 1000 bootstraps in 6.08 minutes


In [74]:
# Separate out raw ATE, standardized ATE, and risk ratio results
raw_ate_results_no_cohabit_with_child_adl_65_75_dic = [r[0] for r in bootstrap_results_no_cohabit_with_child_adl_65_75_dic]
std_ate_results_no_cohabit_with_child_adl_65_75_dic = [r[1] for r in bootstrap_results_no_cohabit_with_child_adl_65_75_dic]
risk_ratio_results_no_cohabit_with_child_adl_65_75_dic = [r[2] for r in bootstrap_results_no_cohabit_with_child_adl_65_75_dic]

df_raw_ate_no_cohabit_with_child_adl_65_75_dic = pd.DataFrame(raw_ate_results_no_cohabit_with_child_adl_65_75_dic)
df_std_ate_no_cohabit_with_child_adl_65_75_dic = pd.DataFrame(std_ate_results_no_cohabit_with_child_adl_65_75_dic)
df_risk_ratio_no_cohabit_with_child_adl_65_75_dic = pd.DataFrame(risk_ratio_results_no_cohabit_with_child_adl_65_75_dic)

df_raw_ate_no_cohabit_with_child_summary_adl_65_75_dic = gc.summarize_bootstrap_percentile_ci(df_raw_ate_no_cohabit_with_child_adl_65_75_dic)
df_std_ate_no_cohabit_with_child_summary_adl_65_75_dic = gc.summarize_bootstrap_percentile_ci(df_std_ate_no_cohabit_with_child_adl_65_75_dic)
df_risk_ratio_no_cohabit_with_child_summary_adl_65_75_dic = gc.summarize_bootstrap_percentile_ci(df_risk_ratio_no_cohabit_with_child_adl_65_75_dic, risk_ratio=True)

# Save
gc.save_results_df(df_raw_ate_no_cohabit_with_child_adl_65_75_dic, prefix = "raw_ate_no_cohabit_with_child_df", label="adl 65–75")
gc.save_results_df(df_std_ate_no_cohabit_with_child_adl_65_75_dic, prefix = "std_ate_no_cohabit_with_child_df", label="adl 65–75")
gc.save_results_df(df_risk_ratio_no_cohabit_with_child_adl_65_75_dic, prefix = "risk_ratio_no_cohabit_with_child_df", label="adl 65–75")


gc.save_results_df(df_raw_ate_no_cohabit_with_child_summary_adl_65_75_dic, prefix = "raw_ate_summary_no_cohabit_with_child", label="adl 65–75")
gc.save_results_df(df_std_ate_no_cohabit_with_child_summary_adl_65_75_dic, prefix = "std_ate_summary_no_cohabit_with_child", label="adl 65–75")
gc.save_results_df(df_risk_ratio_no_cohabit_with_child_summary_adl_65_75_dic, prefix = "risk_ratio_summary_no_cohabit_with_child", label="adl 65–75")

Saved: Results\raw_ate_no_cohabit_with_child_df_adl_65-75_all.csv
Saved: Results\std_ate_no_cohabit_with_child_df_adl_65-75_all.csv
Saved: Results\risk_ratio_no_cohabit_with_child_df_adl_65-75_all.csv
Saved: Results\raw_ate_summary_no_cohabit_with_child_adl_65-75_all.csv
Saved: Results\std_ate_summary_no_cohabit_with_child_adl_65-75_all.csv
Saved: Results\risk_ratio_summary_no_cohabit_with_child_adl_65-75_all.csv


In [None]:
gc.plot_super_learner_permutation_importance(X = X_med_features_adl_65_75_dic, 
                                             y = y_adl_med_65_75_dic, 
                                             model = y_adl_med_super_learner, 
                                             feature_names = trt_features_med_adl_65_75_dic.columns.tolist(), 
                                             label = "adl 65-75 (med)", output_dir="model_outputs/adl/med")

In [None]:
gc.plot_super_learner_permutation_importance(X = X_cor_features_adl_65_75_dic, 
                                             y = y_adl_cor_65_75_dic, 
                                             model = y_adl_cor_super_learner, 
                                             feature_names = trt_features_cor_adl_65_75_dic.columns.tolist(), 
                                             label = "adl 65-75 (cor)", output_dir="model_outputs/adl/cor")

In [None]:
gc.plot_super_learner_permutation_importance(X = X_scan_features_adl_65_75_dic, 
                                             y = y_adl_scan_65_75_dic, 
                                             model = y_adl_scan_super_learner, 
                                             feature_names = trt_features_scan_adl_65_75_dic.columns.tolist(), 
                                             label = "adl 65-75 (scan)", output_dir="model_outputs/adl/scan")