In [1]:
import sys

sys.path.append("./myutils")
from data_generator import (
    DataGeneratorParam,
    TrainDataParam,
    RealDataParam,
    ExpandedDataParam,
    DataGenerator,
)
from train_rhat import train_rhat
from r0_CI import construct_r0_CIs
from true_models import generate_true_models, generate_bias_models
from learner import build_learner
from utils import get_model_directory_uri
import itertools
import numpy as np
import pandas as pd
from multiprocessing.dummy import Pool as ThreadPool
from tqdm import tqdm

In [2]:
Q = 100  # Z in R^Q
P = 100  # X in R^P
# Y in R
OUTPUT_DIRECTORY_URI = "./results"

In [3]:
r0, g0, f0 = generate_true_models(Q, P)
DATA_GENERATOR_PARAM = DataGeneratorParam(p=P, q=Q, r0=r0, g0=g0, f0=f0)

TRAIN_DATA_PARAM = TrainDataParam(n_train=50000)

In [4]:
r0_LEARNERS = {
    # "linear": build_learner(model_type='ols'),
    "random_forest": build_learner(
        model_type="rf",
        output_dim=P,
        n_estimators=100,
        #         max_depth=100,
        max_features="sqrt",
        n_jobs=-1,
    ),
    "kernel": build_learner(model_type="krr"),
    "xgboost": build_learner(model_type="xgb", output_dim=P),
    "neural_net_128x128_1000_64": build_learner(
        model_type="mlp",
        input_dim=P,
        output_dim=Q,
        hidden_layers=[128, 128],
        epochs=1000,
        batch_size=64,
    ),
}

In [5]:
nn_bias_1, nn_bias_2 = generate_bias_models(Q, P)
BIAS_FUNCS = [nn_bias_1, nn_bias_2]
BIAS_SCALES = [0, 1, 10]
N_REALS = [100, 1000]
R_EXPANDEDS = [0.001, 0.01]

REAL_DATA_PARAMS = [
    RealDataParam(bias_func=bias_func, bias_scale=bias_scale, n_real=n_real)
    for bias_func, bias_scale, n_real in itertools.product(
        BIAS_FUNCS, BIAS_SCALES, N_REALS
    )
]
print("Number of real data param combos:", len(REAL_DATA_PARAMS))

EXPANDED_DATA_PARAMS = [
    ExpandedDataParam(n_expanded=10 * n_real, r_expanded=r)
    for n_real, r in itertools.product(N_REALS, R_EXPANDEDS)
]
print("Number of expanded data param combos:", len(EXPANDED_DATA_PARAMS))

Number of real data param combos: 12
Number of expanded data param combos: 4


# Construct r0_CIs


In [6]:
combinations = list(
    itertools.product(r0_LEARNERS.items(), REAL_DATA_PARAMS, EXPANDED_DATA_PARAMS)
)
print(f"There are {len(combinations)} combinations to run.")

There are 192 combinations to run.


In [7]:
def run_single_experiment(
    r0_learner_name,
    r0_learner,
    data_generator_param,
    train_data_param,
    output_directory_uri,
    real_data_param,
    expanded_data_param,
    r0,
    fresh=False,
):
    model_directory_uri = get_model_directory_uri(
        data_generator_param=data_generator_param,
        train_data_param=train_data_param,
        r0_learner_name=r0_learner_name,
        output_directory_uri=output_directory_uri,
    )

    rhat = train_rhat(
        data_generator_param=data_generator_param,
        train_data_param=train_data_param,
        model_directory_uri=model_directory_uri,
        learner_name=r0_learner_name,
        learner=r0_learner,
        fresh=fresh,
    )

    r0_CIs, coverage, avg_me = construct_r0_CIs(
        data_generator_param=data_generator_param,
        real_data_param=real_data_param,
        expanded_data_param=expanded_data_param,
        model_directory_uri=model_directory_uri,
        rhat=rhat,
        r0=r0,
        fresh=fresh,
    )

    return {
        "r0_learner_name": r0_learner_name,
        "bias_func": real_data_param.bias_func.__name__,
        "bias_scale": real_data_param.bias_scale,
        "n_real": real_data_param.n_real,
        "n_expanded": expanded_data_param.n_expanded,
        "r_expanded": expanded_data_param.r_expanded,
        "coverage": coverage,
        "avg_me": avg_me,
    }

In [8]:
# Pack function with fixed args using partial
def run_combination(args):
    (r0_learner_name, r0_learner), real_data_param, expanded_data_param = args
    return run_single_experiment(
        r0_learner_name=r0_learner_name,
        r0_learner=r0_learner,
        data_generator_param=DATA_GENERATOR_PARAM,
        train_data_param=TRAIN_DATA_PARAM,
        output_directory_uri=OUTPUT_DIRECTORY_URI,
        real_data_param=real_data_param,
        expanded_data_param=expanded_data_param,
        r0=r0,
        fresh=False,
    )


with ThreadPool() as pool:
    results = list(
        tqdm(
            pool.imap_unordered(run_combination, combinations), total=len(combinations)
        )
    )

  0%|          | 0/192 [00:54<?, ?it/s]


KeyboardInterrupt: 

In [None]:
results = pd.DataFrame(mp_results)
print(results)
results.to_csv("results.csv", index=False)

In [None]:
from IPython.display import Audio, display

# Replace 'sound.mp3' with the path to your MP3 file.
display(Audio("/u/home/y/yqg36/alarm.mp3", autoplay=True))