## FLAML for hp optimisation and model selection
We use FLAML twice, first to find the best component model for each estimator, and then to optimise the estimators themselves and choose the best estimator. Here we show how it's done

In [2]:
%load_ext autoreload
%autoreload 2
import os, sys
import warnings
warnings.filterwarnings('ignore') # suppress sklearn deprecation warnings for now.. 

# the below checks for whether we run dowhy and auto-causality from source
root_path = root_path = os.path.realpath('../..')
try: 
    import auto_causality
except ModuleNotFoundError:
    sys.path.append(os.path.join(root_path, "auto-causality"))
    
try:
    import dowhy
except ModuleNotFoundError:
    sys.path.append(os.path.join(root_path, "dowhy"))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
from auto_causality import AutoCausality
from auto_causality.datasets import synth_ihdp, preprocess_dataset
from auto_causality.scoring import ate

### Model fitting & scoring
Here we fit a (selection of) model(s) to the data and score them with the ERUPT metric on held-out data

In [20]:
# import dataset
data_df = synth_ihdp()
data_df, features_X, features_W, targets, treatment = preprocess_dataset(data_df)
outcome = targets[0]

# choose which estimators to fit
estimator_list = ["LinearDML","SLearner", "ForestDRLearner"] #"TransformedOutcome" doesn't work?

# init autocausality object with chosen metric to optimise
ac = AutoCausality(
    time_budget=3, 
    estimator_list=estimator_list, 
    metric="erupt",
    verbose=3,
    components_verbose=2,
    components_time_budget=30,
    use_ray=False
)

# run autocausality
myresults = ac.fit(data_df, treatment, outcome, features_W, features_X)

# return best estimator
print(f"Best estimator: {ac.best_estimator}")
# config of best estimator:
print(f"best config: {ac.best_config}")
# best score:
print(f"best score: {ac.best_score}")


[flaml.tune.tune: 03-15 17:47:58] {447} INFO - trial 1 config: {'fit_cate_intercept': 1, 'mc_iters': 0}
[flaml.tune.tune: 03-15 17:48:02] {108} INFO - result: {'erupt': 6.32789671421051, 'qini': -0.016454312144289912, 'auc': 0.5271073490579692, 'ate': 3.789311297861625, 'r_score': -0.02264565589557921, 'estimator': <dowhy.causal_estimator.CausalEstimate object at 0x0000026A41B0B880>, 'scores': {'estimator_name': 'backdoor.econml.dml.LinearDML', 'train': {'erupt': 6.454533366022752, 'qini': 0.033705648485451385, 'auc': 0.5422723983356651, 'r_score': 0.0493278383914737, 'ate': 3.8136580757045535, 'intrp': <econml.cate_interpreter._interpreters.SingleTreeCateInterpreter object at 0x0000026A41F76160>, 'values':      treated  y_factual         p  policy   weights
0        0.0   2.847216  0.177554    True  5.575301
1        0.0   3.372611  0.177554    True       NaN
2        0.0   1.395117  0.177554    True  0.000000
3        1.0   6.871173  0.177554    True  0.000000
4        0.0   0.785504

... Estimator: backdoor.econml.dml.LinearDML
 erupt (validation): 6.327897
 erupt (test): 6.616496
 qini (validation): -0.016454
 qini (test): -0.067953
 auc (validation): 0.527107
 auc (test): 0.487228
 ate (validation): 3.789311
 ate (test): 3.947585
 r_score (validation): -0.022646
 r_score (test): -0.025592


[flaml.tune.tune: 03-15 17:48:11] {108} INFO - result: {'erupt': 6.326161731373182, 'qini': 0.003207553759510602, 'auc': 0.5391303829066524, 'ate': 3.8749767562974338, 'r_score': 0.006958191290367566, 'estimator': <dowhy.causal_estimator.CausalEstimate object at 0x0000026A41E35670>, 'scores': {'estimator_name': 'backdoor.econml.dml.SparseLinearDML', 'train': {'erupt': 6.4649506977711155, 'qini': 0.060994886864560546, 'auc': 0.5449125990642756, 'r_score': 0.03332082210973497, 'ate': 3.991167940353232, 'intrp': <econml.cate_interpreter._interpreters.SingleTreeCateInterpreter object at 0x0000026A41C764C0>, 'values':      treated  y_factual         p  policy   weights
0        0.0   2.847216  0.177554    True  5.620628
1        0.0   3.372611  0.177554    True       NaN
2        0.0   1.395117  0.177554    True  0.000000
3        1.0   6.871173  0.177554    True  0.000000
4        0.0   0.785504  0.177554    True  0.000000
..       ...        ...       ...     ...       ...
592      1.0   

... Estimator: backdoor.econml.dml.SparseLinearDML
 erupt (validation): 6.326162
 erupt (test): 6.616496
 qini (validation): 0.003208
 qini (test): -0.081495
 auc (validation): 0.539130
 auc (test): 0.485292
 ate (validation): 3.874977
 ate (test): 4.078688
 r_score (validation): 0.006958
 r_score (test): -0.016833
config: {'overall_model': AutoML(append_log=False, auto_augment=True, early_stop=False, ensemble=False,
       estimator_list='auto', eval_method='auto', hpo_method='auto',
       keep_search_state=False, learner_selector='sample', log_file_name='',
       log_training_metric=False, log_type='better', max_iter=1000000,
       mem_thres=4294967296, metric='auto', min_sample_size=10000,
       model_history=False, n_concurrent_trials=1, n_jobs=-1, n_splits=5,
       pred_time_limit=1e-05, retrain_full=True, sample=True, split_ratio=0.1,
       split_type='auto', starting_points={}, task='regression', time_budget=2,
       train_time_limit=inf, use_ray=False, ...)}


[flaml.tune.tune: 03-15 17:48:13] {447} INFO - trial 1 config: {'min_propensity': 3.070155442191211e-06, 'mc_iters': 1, 'n_estimators': 324, 'max_depth': 2, 'min_samples_split': 18, 'min_samples_leaf': 7, 'min_weight_fraction_leaf': 0.377743449018298, 'max_features': 'sqrt', 'min_impurity_decrease': 2.6639242043080236, 'max_samples': 0.378105341302808, 'min_balancedness_tol': 0.006805783323944936, 'honest': 1, 'subforest_size': 7}


... Estimator: backdoor.econml.metalearners.SLearner
 erupt (validation): 6.368044
 erupt (test): 6.620952
 qini (validation): 0.023626
 qini (test): -0.072504
 auc (validation): 0.576055
 auc (test): 0.541810
 ate (validation): 3.960894
 ate (test): 4.018638
 r_score (validation): 0.081523
 r_score (test): 0.048389


[flaml.tune.tune: 03-15 17:48:18] {108} INFO - result: {'erupt': 6.326161731373182, 'qini': -0.09313027300538203, 'auc': 0.4618810041675895, 'ate': 4.073847574256198, 'r_score': -0.030115627581726256, 'estimator': <dowhy.causal_estimator.CausalEstimate object at 0x0000026A45403FD0>, 'scores': {'estimator_name': 'backdoor.econml.dr.ForestDRLearner', 'train': {'erupt': 6.465497504990055, 'qini': -0.0028583504389217084, 'auc': 0.501461675769016, 'r_score': -0.005822761034007362, 'ate': 4.073847574256195, 'intrp': <econml.cate_interpreter._interpreters.SingleTreeCateInterpreter object at 0x0000026A450422B0>, 'values':      treated  y_factual         p  policy   weights
0        0.0   2.847216  0.177554    True  5.632075
1        0.0   3.372611  0.177554    True       NaN
2        0.0   1.395117  0.177554    True  0.000000
3        1.0   6.871173  0.177554    True  0.000000
4        0.0   0.785504  0.177554    True  0.000000
..       ...        ...       ...     ...       ...
592      1.0  

... Estimator: backdoor.econml.dr.ForestDRLearner
 erupt (validation): 6.326162
 erupt (test): 6.616496
 qini (validation): -0.093130
 qini (test): -0.009461
 auc (validation): 0.461881
 auc (test): 0.494263
 ate (validation): 4.073848
 ate (test): 4.073848
 r_score (validation): -0.030116
 r_score (test): -0.006540
Best estimator: backdoor.econml.metalearners.SLearner
best config: {}
best score: 6.368043645320948


In [None]:
ac.train_df

In [None]:
outcome = targets[0]
ac.full_scores["baseline"]={"estimator": "baseline",
                               "outcome": outcome,
                              "train":{"erupt": ac.train_df[outcome].mean(),
                                       "ate": ate(ac.train_df[treatment],ac.train_df[outcome])[0]},
                              "validation":{"erupt": ac.test_df[outcome].mean(),
                                      "ate": ate(ac.test_df[treatment],ac.test_df[outcome])[0]}}

In [None]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

colors = ([matplotlib.colors.CSS4_COLORS['black']] +
    list(matplotlib.colors.TABLEAU_COLORS) + [
    matplotlib.colors.CSS4_COLORS['lime'],
    matplotlib.colors.CSS4_COLORS['yellow'],
    matplotlib.colors.CSS4_COLORS['pink']
])

v = ac.full_scores
plt.figure(figsize = (7,5))
plt.title(outcome)
for (est, scr),col in zip(v.items(),colors):
    sc = [scr['train']['erupt'], scr['validation']['erupt']]
    crv = [scr['train']['ate'], scr['validation']['ate']]
    plt.plot(sc, crv, color=col, marker="o")
    plt.scatter(sc[1:],crv[1:], c=col, s=120 )
    plt.grid()
    plt.xlabel("ERUPT score")
    plt.ylabel("ATE")
    plt.legend(v.keys(),bbox_to_anchor=(1.04,1), borderaxespad=0)

plt.grid()
plt.show()


In [None]:
scr = ac.full_scores[ac.best_estimator]
intrp = scr['validation']['intrp']
plt.figure(figsize=(15, 7))
try: 
    feature_names = intrp.feature_names
except:
    feature_names = features_X + [ w for w in features_W if w not in features_X]
intrp.plot(feature_names=intrp.feature_names, fontsize=10)
#         intrp.plot( fontsize=10)
plt.title(f"{ac.best_estimator}_{outcome}")
plt.show()


In [None]:
# TODO: add SHAP plots!
import matplotlib.pyplot as plt
import shap

# and now let's visualize feature importances!
from auto_causality.shap import shap_values

# Shapley values calculation can be slow so let's subsample
this_df = ac.test_df.sample(100)

wanted = ["CausalForestDML"]#,"ForestDRLearner","DirectUpliftDoWhyWrapper"]#,"CausalForestDML",]

scr = ac.full_scores[ac.best_estimator]
print(outcome, ac.best_estimator)
est = ac.estimates[ac.best_estimator]
shaps = shap_values(est, this_df)

plt.title(outcome + '_' + ac.best_estimator.split('.')[-1])
shap.summary_plot(shaps, this_df[est.estimator._effect_modifier_names])
plt.show()
