In [1]:
import numpy as np
from sklearn.neural_network import MLPRegressor, MLPClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import root_mean_squared_error
from data_generation import get_data, g_0, m_0
from dml_algorithm import dml_ate

In [21]:
# XGBoost
import pickle
with open('results_dict.pkl', 'rb') as pickle_file:
    results_dict_ = pickle.load(pickle_file)

results_dict = {}

for N, results in results_dict_.items():
    rmses = results[-1]
    results_dict[N] = np.mean(rmses, axis=0)

print(results_dict)

{250: array([1.56527874, 1.91652426, 0.18219812]), 500: array([1.21668199, 1.46546163, 0.15362898]), 1000: array([0.94717333, 1.1181108 , 0.12635505]), 2000: array([0.75846577, 0.87028295, 0.10306294]), 4000: array([0.61785315, 0.68996007, 0.0835763 ]), 8000: array([0.51142757, 0.56260442, 0.06938977]), 16000: array([0.43729764, 0.47732874, 0.05567659])}


In [22]:
# ElasticNet
import pickle
with open('results_dict.pkl', 'rb') as pickle_file:
    results_dict_ = pickle.load(pickle_file)

results_dict = {}

for N, results in results_dict_.items():
    rmses = results[-1]
    results_dict[N] = np.mean(rmses, axis=0)

print(results_dict)

{250: array([0.81783588, 0.91132411, 0.15982491]), 500: array([0.6223609 , 0.67033547, 0.12340524]), 1000: array([0.51418182, 0.54255521, 0.09347865]), 2000: array([0.4562664 , 0.47172106, 0.06874686]), 4000: array([0.42061945, 0.43096792, 0.04985691]), 8000: array([0.40363964, 0.41099026, 0.03545416]), 16000: array([0.39323008, 0.399441  , 0.02546802])}


In [2]:
rng = np.random.default_rng(seed=42)

In [5]:
N = 1000
y_data, d_data, x_data = get_data(N, rng)
scaler = StandardScaler()
x_data_stand = scaler.fit_transform(x_data)
y_train, y_test, d_train, d_test, x_train, x_test, x_train_stand, x_test_stand = train_test_split(y_data, d_data, x_data, x_data_stand, test_size=0.2, random_state=42)

In [6]:
model_g = MLPRegressor(hidden_layer_sizes=(32,32,16), alpha=0.25, batch_size=1, max_iter=100, random_state=42, validation_fraction=0)
model_m = MLPClassifier(hidden_layer_sizes=(32,32,16), alpha=0.1, batch_size=2, max_iter=50, random_state=42, validation_fraction=0)

In [7]:
%%time
for i in range(50):
    model_g.partial_fit(x_train_stand[d_train==1], y_train[d_train==1])
    print('Iteration', i, root_mean_squared_error(g_0(1, x_test), model_g.predict(x_test_stand)))

Iteration 0 2.9254583394379505
Iteration 1 2.3869640761324757
Iteration 2 1.7196647291653158
Iteration 3 1.3667411197900294
Iteration 4 1.2052914410778284
Iteration 5 1.1280620109845532
Iteration 6 1.076609041426548
Iteration 7 1.038368133762261
Iteration 8 1.0132592316712967
Iteration 9 0.9875319064848911
Iteration 10 0.9668545264781349
Iteration 11 0.9447033586055306
Iteration 12 0.9337537718956059
Iteration 13 0.9141386030607617
Iteration 14 0.8957815774224622
Iteration 15 0.8794908360400732
Iteration 16 0.8675088301282665
Iteration 17 0.8566949762502983
Iteration 18 0.8474834912445466
Iteration 19 0.8410502711605106
Iteration 20 0.8343856147545885
Iteration 21 0.8301826680034307
Iteration 22 0.8253348296624003
Iteration 23 0.8241137206311427
Iteration 24 0.8220605488225553
Iteration 25 0.8151792083585391
Iteration 26 0.8142505112511759
Iteration 27 0.8126659369033976
Iteration 28 0.8106460386574582
Iteration 29 0.8096159763618671
Iteration 30 0.8072762289746623
Iteration 31 0.80793

In [8]:
model_g.fit(x_train_stand[d_train==1], y_train[d_train==1])

In [9]:
root_mean_squared_error(g_0(1, x_test), model_g.predict(x_test_stand))

0.5986252449832048

In [59]:
%%time
model_m.fit(x_train_stand, d_train)

Iteration 1, loss = 1.49007839
Iteration 2, loss = 0.83796842
Iteration 3, loss = 0.73261576
Iteration 4, loss = 0.70399826
Iteration 5, loss = 0.69276004
Iteration 6, loss = 0.68778633
Iteration 7, loss = 0.68518454
Iteration 8, loss = 0.68207997
Iteration 9, loss = 0.68305979
Iteration 10, loss = 0.68121131
Iteration 11, loss = 0.68145855
Iteration 12, loss = 0.68163973
Iteration 13, loss = 0.67733565
Iteration 14, loss = 0.67911415
Iteration 15, loss = 0.67633630
Iteration 16, loss = 0.67451005
Iteration 17, loss = 0.67885545
Iteration 18, loss = 0.67843708
Iteration 19, loss = 0.67485154
Iteration 20, loss = 0.67367714
Iteration 21, loss = 0.67455933
Iteration 22, loss = 0.67351553
Iteration 23, loss = 0.67740725
Iteration 24, loss = 0.67541130
Iteration 25, loss = 0.67509002
Iteration 26, loss = 0.67408275
Iteration 27, loss = 0.67462421
Iteration 28, loss = 0.67749028
Iteration 29, loss = 0.67355785
Iteration 30, loss = 0.67284423
Iteration 31, loss = 0.67143218
Iteration 32, los

In [60]:
root_mean_squared_error(m_0(x_test), model_m.predict_proba(x_test_stand)[:,1])

0.14982370553476335

In [11]:
from sklearn.metrics import mean_squared_error

In [40]:
%%time
model_m.fit(x_train, d_train)
print(mean_squared_error(model_m.predict_proba(x_test)[:,1], m_0(x_test)))
svm_model_m.fit(x_train, d_train)
print(mean_squared_error(svm_model_m.predict_proba(x_test)[:,1], m_0(x_test)))
svm_model_m.fit(x_stand_train, d_train)
print(mean_squared_error(svm_model_m.predict_proba(x_stand_test)[:,1], m_0(x_test)))

0.026702033623244567
0.037738635375602556
0.045538710681002785
CPU times: total: 141 ms
Wall time: 136 ms


In [35]:
%%time
model_g.fit(x_train[d_train==0], y_train[d_train==0])
print(mean_squared_error(model_g.predict(x_test), g_0(0, x_test)))
svm_model_g.fit(x_train[d_train==0], y_train[d_train==0])
print(mean_squared_error(svm_model_g.predict(x_test), g_0(0, x_test)))
svm_model_g.fit(x_stand_train[d_train==0], y_train[d_train==0])
print(mean_squared_error(svm_model_g.predict(x_stand_test), g_0(0, x_test)))

1.0763928763707538
0.845974705252724
0.9726778061261214
CPU times: total: 15.6 ms
Wall time: 23.8 ms


In [36]:
%%time
model_g.fit(x_train[d_train==1], y_train[d_train==1])
print(mean_squared_error(model_g.predict(x_test), g_0(1, x_test)))
svm_model_g.fit(x_train[d_train==1], y_train[d_train==1])
print(mean_squared_error(svm_model_g.predict(x_test), g_0(1, x_test)))
svm_model_g.fit(x_stand_train[d_train==1], y_train[d_train==1])
print(mean_squared_error(svm_model_g.predict(x_stand_test), g_0(1, x_test)))

2.091994726884207
1.9550870294240825
2.1874437665098676
CPU times: total: 31.2 ms
Wall time: 18 ms


In [5]:
def svm_cv(y_data, d_data, x_data, cv=5):
    model_g = SVR()
    #model_m = CalibratedClassifierCV(estimator=LinearSVC(dual='auto', max_iter=5000, random_state=42))
    model_m = SVC(probability=True, random_state=42)
    
    param_grid = {
        'C': [0.1, 1, 10],
        'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
    }

    grid_search_g = GridSearchCV(estimator=model_g, param_grid=param_grid, cv=cv, n_jobs=-1,
                                 scoring='neg_mean_squared_error')
    grid_search_m = GridSearchCV(estimator=model_m, param_grid=param_grid, cv=cv, n_jobs=-1,
                                 scoring='neg_brier_score')

    svm_params_dict = {}
    for d in [0, 1]:
        grid_search_g.fit(X=x_data[d_data == d], y=y_data[d_data == d])
        svm_params_dict[f'g{d}'] = grid_search_g.best_params_
    grid_search_m.fit(X=x_data, y=d_data)
    svm_params_dict['m'] = grid_search_m.best_params_

    return svm_params_dict

In [12]:
%%time
svm_params_dict = svm_cv(y_data, d_data, x_data)
svm_params_dict

KeyboardInterrupt: 

In [28]:
model_g0, model_g1 = LinearSVR(dual='auto'), LinearSVR(dual='auto')
model_g0_nl, model_g1_nl = SVR(), SVR()
model_m = CalibratedClassifierCV(estimator=LinearSVC(dual='auto')) #, n_jobs=-1
model_m_nl = SVC(probability=True, random_state=42)

In [52]:
N = 250
y_data, d_data, x_data = get_data(N, rng)
y_train, y_test, d_train, d_test, x_train, x_test = train_test_split(y_data, d_data, x_data, test_size=0.2, random_state=42)
model_m = CalibratedClassifierCV(estimator=LinearSVC(dual='auto', max_iter=2500))
model_m_2 = CalibratedClassifierCV(estimator=LinearSVC(dual='auto', max_iter=2500), method='isotonic')

In [53]:
%%time
model_m.fit(x_train, d_train)
print(model_m.predict_proba(x_test)[:20,1])
model_m_2.fit(x_train, d_train)
print(model_m_2.predict_proba(x_test)[:20,1])
print(m_0(x_test[:20]))

[0.75782366 0.1081392  0.4380588  0.13226565 0.22477492 0.32252746
 0.52142833 0.55220563 0.3364116  0.61823012 0.76432348 0.29846205
 0.46542094 0.15033277 0.65025305 0.4393908  0.85164284 0.57002081
 0.63447986 0.04004093]
[0.93333333 0.         0.4512605  0.         0.09203475 0.30238095
 0.4512605  0.4912605  0.33952381 0.67239993 0.93333333 0.27507003
 0.4912605  0.05714286 0.72466527 0.4512605  0.96012949 0.54226408
 0.62380952 0.        ]
[0.8485524  0.09058125 0.30348166 0.02835181 0.13868599 0.39836533
 0.46815584 0.49492956 0.23402002 0.82336364 0.70270092 0.35112246
 0.48414538 0.03198229 0.78706381 0.32661086 0.83445733 0.65141855
 0.6412689  0.00732812]
CPU times: total: 109 ms
Wall time: 115 ms


In [None]:
C, max_iter

In [None]:
param_grid = {
    'C': [0.1, 1, 10, 100]
}

In [255]:
%%time
N = 16000
y_data, d_data, x_data = get_data(N, rng)
model_g0, model_g1 = LinearSVR(C=5, dual='auto', max_iter=5000, random_state=42), LinearSVR(C=5, dual='auto', max_iter=5000, random_state=42)
model_g = [model_g0, model_g1]
model_m = CalibratedClassifierCV(estimator=LinearSVC(C=5, dual='auto', max_iter=5000, random_state=42)) #, n_jobs=-1
dml_ate(y_data, d_data, x_data, model_g, model_m, K=5, classical=True, inference=True, alpha=0.05)

CPU times: total: 3.02 s
Wall time: 3 s


(array([0.51908536, 0.55552273, 0.45258778]),
 5.451014995427961,
 array([0.43462256, 0.60354816]))

In [46]:
%%time
model_g0, model_g1 = LinearSVR(dual='auto', max_iter=2500, random_state=42), LinearSVR(dual='auto', max_iter=2500, random_state=42)
model_g = [model_g0, model_g1]
model_m = SVC(probability=True, random_state=42)
dml_ate(y_data, d_data, x_data, model_g, model_m, K=5, classical=True, inference=True, alpha=0.05)

CPU times: total: 984 ms
Wall time: 986 ms


(array([0.5423967 , 0.64183653, 0.10269771]),
 4.224612415082437,
 array([0.28055733, 0.80423608]))

In [198]:
for N in [250, 500, 1000]:#, 2000, 4000, 8000, 16000]:
    y_data, d_data, x_data = get_data(N, rng)
    svm_params_dict = svm_cv(y_data, d_data, x_data)
    print(svm_params_dict)

{'g0': {'C': 10}, 'g1': {'C': 5}, 'm': {'C': 10}}
{'g0': {'C': 5}, 'g1': {'C': 3}, 'm': {'C': 10}}




{'g0': {'C': 10}, 'g1': {'C': 10}, 'm': {'C': 1}}


In [None]:
choose C=5!