In [1]:
import sys
sys.path.append("/u/home/y/yqg36/Mediation/myutils")  
from data_generator import DataGeneratorParam, TrainDataParam, RealDataParam, ExpandedDataParam
from r0_CI import construct_r0_CIs
from true_models import generate_true_models, generate_bias_models
from learner import build_learner
import itertools
import pandas as pd

Note: You have installed the 'manylinux2014' variant of XGBoost. Certain features such as GPU algorithms or federated learning are not available. To use these features, please upgrade to a recent Linux distro with glibc 2.28+, and install the 'manylinux_2_28' variant.


In [2]:
Q = 100   # Z in R^Q
P = 100   # X in R^P
          # Y in R
OUTPUT_DIRECTORY_URI = "/u/scratch/y/yqg36/Mediation/results"

In [3]:
r0, g0, f0 = generate_true_models(Q, P)
DATA_GENERATOR_PARAM = DataGeneratorParam(p=P, q=Q, r0=r0, g0=g0, f0=f0)

TRAIN_DATA_PARAM = TrainDataParam(n_train=50000)

In [4]:
r0_LEARNERS = {
    "linear": build_learner(model_type='ols'),
    "random_forest": build_learner(
        model_type='rf', 
        output_dim=P, 
        n_estimators=100, 
        # max_depth=10,
        max_features='sqrt',
        n_jobs=-1,
    ),
    "kernel": build_learner(model_type='krr'),
    "xgboost": build_learner(model_type='xgb', output_dim=P),
    "neural_net_128x128_1000_64": build_learner(
        model_type = 'mlp', 
        input_dim = P,  
        output_dim = Q,
        hidden_layers = [128, 128],
        epochs = 1000,
        batch_size = 64
    ),   
}

In [5]:
nn_bias_1, nn_bias_2 = generate_bias_models(Q, P)
BIAS_FUNCS = [nn_bias_1, nn_bias_2]
BIAS_SCALES = [0, 1, 10]
N_REALS = [100]

REAL_DATA_PARAMS = [RealDataParam(bias_func=bias_func, bias_scale=bias_scale, n_real=n_real) 
                    for bias_func, bias_scale, n_real in itertools.product(BIAS_FUNCS, BIAS_SCALES, N_REALS)]

N_EXPANDEDS = [1000, 10000]
R_EXPANDEDS = [0.01, 0.1, 1]

EXPANDED_DATA_PARAMS = [ExpandedDataParam(n_expanded, r_expanded) 
                        for n_expanded, r_expanded in itertools.product(N_EXPANDEDS, R_EXPANDEDS)]

In [6]:
combinations = list(itertools.product(
    REAL_DATA_PARAMS,
    EXPANDED_DATA_PARAMS,
    r0_LEARNERS.items(),
))
print(f"There are {len(combinations)} combinations to run.")

There are 180 combinations to run.


In [None]:
mp_results = []
for idx, (real_data_param, expanded_data_param, (r0_learner_name, r0_learner)) in enumerate(combinations):
    mp_results.append(
        construct_r0_CIs(
            data_generator_param = DATA_GENERATOR_PARAM,
            train_data_param = TRAIN_DATA_PARAM,
            real_data_param = REAL_DATA_PARAMS[0],
            expanded_data_param = EXPANDED_DATA_PARAMS[0],
            output_directory_uri = OUTPUT_DIRECTORY_URI,
            r0_learner_name = r0_learner_name, 
            r0_learner = r0_learner,
        )
    )
    print(f"{idx + 1} / {len(combinations)} finished.")

'train_model' executed in 0.003914s
Coverage: 1.0
 Average ME: 259.7148796378489

'construct_r0_CIs' executed in 0.008683s
1 / 180 finished.
'train_model' executed in 67.920114s


In [None]:
from IPython.display import Audio, display

# Replace 'sound.mp3' with the path to your MP3 file.
display(Audio("/u/home/y/yqg36/alarm.mp3", autoplay=True))