In [16]:
from data import load_dataset, get_train_test_split

filename = "dataset/LoanStats3a.csv"
features, data = load_dataset(filename, encoding='utf-8')
print("Data shape: %s" % str(features.shape))

%load_ext autoreload
%autoreload 2
%reload_ext autoreload

Preprocessing...
Feature Engineering...
Data shape: (42535, 40)
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

SEED            = 1
THRESHOLD       = 1.1
NUM_PERIODS     = 10
NUM_MONTHS      = 60
FUND_GIVEN      = 0
FUND_FLOW       = 1e4
LOANS_PER_MONTH = 100
NUM_TRAIN_ROWS_BO = 50
BO_TRAIN_SEED   = 2

# Data initialization
X_train, X_test, y_train, y_test = get_train_test_split(features, test_size=0.3, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, train_size=0.2, random_state=0)

# BO GP training data init
X_train_bo, _, y_train_bo, _ = train_test_split(X_train, y_train, train_size=1, random_state=BO_TRAIN_SEED)

# Normalize
X_scaler = MinMaxScaler()
X_scaler.fit(X_train)
X_train = X_scaler.transform(X_train)
X_val = X_scaler.transform(X_val)
X_test_scaled = X_scaler.transform(X_test)
X_train_normal = X_scaler.transform(X_train_normal)
X_train_bo = X_scaler.transform(X_train_bo)

y_scaler = MinMaxScaler()
y_scaler.fit(y_train.reshape(-1,1))
y_train_scaled = y_scaler.transform(y_train.reshape(-1,1))
y_val_scaled = y_scaler.transform(y_val.reshape(-1,1))
y_test_scaled = y_scaler.transform(y_test.reshape(-1,1))
y_train_normal_scaled = y_scaler.transform(y_train_normal.reshape(-1,1))
y_train_bo_scaled = y_scaler.transform(y_train_bo.reshape(-1,1))

import GPy
import numpy as np

from simulation import simulate_N_time_periods

def f(c, kappa):
    global X_train
    global X_train_bo
    global y_train_bo_scaled
    global y_train_scaled
    
    X_train_1 = np.copy(X_train)
    X_train_bo_1 = np.copy(X_train_bo)
    y_train_scaled_1 = np.copy(y_train_scaled)
    y_train_bo_scaled_1 = np.copy(y_train_bo_scaled)
    
    kernel = GPy.kern.RBF(input_dim=X_train.shape[1], variance=1., lengthscale=1.)
    gp_model_bay_opt = GPy.models.GPRegression(X_train_bo, y_train_bo_scaled, kernel)
    gp_model_bay_opt.optimize()
    for i in range(NUM_TRAIN_ROWS_BO):
        mean, var = gp_model_bay_opt.predict(X_train_1)
        acquisition = mean + np.sqrt(var)*kappa # - get_loan_amnt(X_train).reshape(-1, 1)
        next_sample_ind = acquisition.argmax()
        next_sample_X = X_train_1[next_sample_ind].reshape(1, -1)
        next_sample_y = y_train_scaled_1[next_sample_ind].reshape(1, -1)
        X_train_bo_1 = np.concatenate((X_train_bo_1, next_sample_X), axis=0)
        y_train_bo_scaled_1 = np.concatenate((y_train_bo_scaled_1, next_sample_y), axis=0)
        X_train_1 = np.delete(X_train_1, next_sample_ind, axis=0)
        y_train_scaled_1 = np.delete(y_train_scaled_1, next_sample_ind, axis=0)
        gp_model_bay_opt.set_XY(X=X_train_bo_1, Y=y_train_bo_scaled_1)
        gp_model_bay_opt.optimize()
    
    perf_bayes_opt = simulate_N_time_periods(
        gp_model_bay_opt,
        X_val, y_val,
        X_scaler, y_scaler,
        threshold=THRESHOLD,
        num_periods=NUM_PERIODS,
        fund_given=FUND_GIVEN, 
        num_months=NUM_MONTHS,
        flow=FUND_FLOW,
        incoming_loans_per_time_period=LOANS_PER_MONTH,
        conf_quantile=(c*100, 100),
        optimize_for="TODO",
        version="loan_amount_and_variance",
        model_type="gp", seed=SEED
    )
    return np.mean(np.sum(perf_bayes_opt[:,:,0], axis=1)) / 1e5

In [18]:
from bayes_opt import BayesianOptimization as BayOpt

pbounds = {'x': [0.1, 0.5], 'y': [0.5, 10]}
bo = BayOpt(lambda x, y: f(x, y), pbounds)

gp_params = {"alpha": 1e-5, "n_restarts_optimizer": 2}

bo.maximize(n_iter=50, acq='ei', **gp_params)

print(bo.res['max'])
print(bo.res['all'])

[31mInitialization[0m
[94m-----------------------------------------------------[0m
 Step |   Time |      Value |         x |         y | 
    1 | 00m03s | [35m   1.70422[0m | [32m   0.3944[0m | [32m   7.0065[0m | 
    2 | 00m03s |    1.14533 |    0.3680 |    3.4897 | 




    3 | 00m03s |    1.13689 |    0.2966 |    2.8129 | 
    4 | 00m03s |    1.53672 |    0.4807 |    6.6574 | 
    5 | 00m03s |    1.59981 |    0.4377 |    6.7497 | 
[31mBayesian Optimization[0m
[94m-----------------------------------------------------[0m
 Step |   Time |      Value |         x |         y | 
    6 | 00m11s |    1.48167 |    0.1000 |    7.5222 | 
    7 | 00m07s |    1.27784 |    0.5000 |    7.2130 | 
    8 | 00m06s | [35m   2.10813[0m | [32m   0.1000[0m | [32m   7.0184[0m | 
    9 | 00m06s |    1.53047 |    0.1000 |    5.8103 | 
   10 | 00m07s |    1.26235 |    0.5000 |    4.8927 | 
   11 | 00m08s |    1.67738 |    0.1000 |    6.8827 | 
   12 | 00m07s | [35m   2.31281[0m | [32m   0.1000[0m | [32m   7.1560[0m | 
   13 | 00m07s |    1.10165 |    0.5000 |    5.5739 | 
   14 | 00m07s | [35m   2.60513[0m | [32m   0.1000[0m | [32m   4.3261[0m | 
   15 | 00m07s |    2.01651 |    0.1000 |    4.5121 | 
   16 | 00m07s |    2.42774 |    0.1001 |    4.1520 | 




   26 | 00m04s |    1.96363 |    0.1000 |    4.0904 | 




   27 | 00m04s |    1.14630 |    0.2493 |    2.8617 | 
   28 | 00m04s | [35m   2.64541[0m | [32m   0.1029[0m | [32m   4.3253[0m | 
   29 | 00m11s |    2.02603 |    0.1657 |    6.2063 | 
   30 | 00m04s |    2.34861 |    0.1425 |    4.3191 | 
   31 | 00m11s |    2.07556 |    0.1492 |    6.0928 | 
   32 | 00m04s |    2.31053 |    0.1070 |    7.0977 | 
   33 | 00m10s |    2.42774 |    0.1001 |    4.1520 | 
   34 | 00m04s |    1.58883 |    0.1155 |    4.3278 | 
   35 | 00m04s |    1.81764 |    0.1192 |    9.1304 | 
   36 | 00m07s |    1.90816 |    0.2998 |    1.8569 | 
   37 | 00m04s |    2.08234 |    0.1018 |    4.3116 | 
   38 | 00m07s |    1.62254 |    0.2666 |    2.8472 | 
   39 | 00m07s |    2.37264 |    0.1429 |    7.1009 | 
   40 | 00m07s |    2.18671 |    0.1185 |    7.1128 | 
   41 | 00m04s |    2.22284 |    0.1075 |    4.1382 | 
   42 | 00m04s |    1.43537 |    0.4690 |    0.5514 | 
   43 | 00m08s |    1.83227 |    0.2760 |    4.0800 | 
   44 | 00m08s |    2.22629 |    0.144