In [None]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from data import load_dataset, get_train_test_split
# from optimization import gp_optimize_threshold
# from evaluation import train_and_test_other_models

In [None]:
filename = "dataset/LoanStats3a.csv"
features, data = load_dataset(filename, encoding='utf-8')
print("Data shape: %s" % str(features.shape))

In [None]:
"""
EXPERIMENT 1: GP VS BAYES OPT IN LATER STAGES
-------------------------------------------------------
Set up:
1. Get 100-500 data rows to as training set.
2. Create a GP model and train it with training set.
3. Let K = 100-200.
Experiment:
1. Let GP_model do prediction and self-updating using loan_amount_and_variance version for K more steps. Then pure prediction.
2. Let GP_model do prediction and self-updating using Bayesian optimization for K more steps. Then pure prediction.
3. [OPTIONAL] Let SVM do prediction after training with training set to compare.
Results:
1. Compare the 3 profits gained at the end of all periods.
"""

X_train, X_test, y_train, y_test = get_train_test_split(features, test_size=0.3, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, train_size=0.2, random_state=0)

X_train, y_train = X_train[:1000,:], y_train[:1000]

# Normalize
X_scaler = MinMaxScaler()
X_scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_val_scaled = X_scaler.transform(X_val)
X_test_scaled = X_scaler.transform(X_test)

y_scaler = MinMaxScaler()
y_scaler.fit(y_train.reshape(-1,1))
y_train_scaled = y_scaler.transform(y_train.reshape(-1,1))
y_val_scaled = y_scaler.transform(y_val.reshape(-1,1))
y_test_scaled = y_scaler.transform(y_test.reshape(-1,1))

print("X_train: %s, y_train: %s" % (str(X_train.shape), str(y_train.shape)))
print("X_val: %s, y_val: %s" % (str(X_val.shape), str(y_val.shape)))
print("X_test: %s, y_test: %s" % (str(X_test.shape), str(y_test.shape)))

In [None]:
# Gaussian Process
import GPy

# Initialize GP Model
kernel = GPy.kern.RBF(input_dim=X_train.shape[1], variance=1., lengthscale=1.)
gp_model = GPy.models.GPRegression(X_train, y_train_scaled, kernel)
gp_model.optimize()

In [None]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

SEED            = 1
THRESHOLD       = 1.1
NUM_PERIODS     = 5
NUM_MONTHS      = 300
FUND_GIVEN      = 0
FUND_PER_MONTH  = 1e5
LOANS_PER_MONTH = 500
CONF_QUANTILE   = (40,100)
NUM_UPDATE_ROWS = 100
from simulation import *

perf_gp = simulate_N_time_periods(
    gp_model.copy(),
    X_test, y_test,
    X_scaler, y_scaler,
    threshold=THRESHOLD,
    num_periods=NUM_PERIODS,
    fund_given=FUND_GIVEN,
    num_months=NUM_MONTHS,
    incoming_fund_per_month=FUND_PER_MONTH,
    incoming_loans_per_time_period=LOANS_PER_MONTH,
    conf_quantile=CONF_QUANTILE,
    optimize_for="TODO", 
    version="self_updating_gp",
    gp_update_steps=NUM_UPDATE_ROWS,
    model_type="gp",
    seed=SEED
)
print("Profits for self-updating GP:")
print(np.mean(np.sum(perf_gp[:,:,0], axis=1)))

In [None]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

SEED            = 1
THRESHOLD       = 1.1
NUM_PERIODS     = 5
NUM_MONTHS      = 300
FUND_GIVEN      = 0
FUND_PER_MONTH  = 1e5
LOANS_PER_MONTH = 500
CONF_QUANTILE   = (40,100)
NUM_UPDATE_ROWS = 100

from simulation import *

from scipy.stats import norm

kappa = 3
perf_bayes_opt = simulate_N_time_periods(
    gp_model.copy(),
    X_test, y_test,
    X_scaler, y_scaler,
    threshold=THRESHOLD,
    num_periods=NUM_PERIODS,
    fund_given=FUND_GIVEN, 
    num_months=NUM_MONTHS,
    incoming_fund_per_month=FUND_PER_MONTH,
    incoming_loans_per_time_period=LOANS_PER_MONTH,
    conf_quantile=CONF_QUANTILE,
    optimize_for="TODO",
    version="bayesian_optimization",
    kappa=kappa,
    bay_opt_steps=NUM_UPDATE_ROWS,
    model_type="gp", seed=SEED
)
print("Profits for Bayesian Optimization:")
print(np.mean(np.sum(perf_bayes_opt[:,:,0], axis=1)))

In [None]:
print("Mean Total Profits:")
print(np.mean(np.sum(perf_gp[:,:,0], axis=1)))
print(np.mean(np.sum(perf_bayes_opt[:,:,0], axis=1)))

import os

from visualisation import plot_portfolio_performance, plot_portfolio_performance_comparisons

# Saving performances to pickle file and Loading performances from it
filename = os.path.join("results", "perf_bayeopt_vs_gp_1")
meta_info = ["SelfUpd GP", "BayOpt GP"]
performances = [perf_gp, perf_bayes_opt]
store_performance_results(performances, meta_info, filename)

meta_info, loaded_perf = load_performance_results(filename)
print(meta_info)
plot_portfolio_performance_comparisons(loaded_perf, legend_names=meta_info)