### Imports

In [22]:
import numpy as np
import pandas as pd
import xgboost as xgb
import seaborn as sns
from sklearn.metrics import mean_squared_error as mse_metric
from itertools import product

In [23]:
data_directory = '../data/experiment_small/'
data = np.load(data_directory + 'network_params/data_network.npy')
data = np.transpose(data, (1, 2, 0))
data.shape

(4, 2801, 2)

In [35]:
data_train = data[:,:-3*365,:]
data_val = data[:,-3*365:-365,:]
data_test = data[:,-365:,:]

### data slicing

In [36]:
def data_slice(data, h, f):
    T = h + f
    samples = np.array([data[:,t:t + T,:] for t in range(len(data[0]) - T + 1)])
    return samples[:, :, :h, :], samples[:, : , h:, :]

def get_mse(forecast, true_vals):
    N, f , _ = forecast.shape
    forecast = forecast.reshape(N, f * 2)
    true_vals = true_vals.reshape(N, f * 2)
    
    return mse_metric(forecast, true_vals)

### gradboost forecaster

In [37]:
def fit_gbst_forecaster(hist_samples, future_samples, params):
    m, N, h, _ = hist_samples.shape
    _, _, f, _ = future_samples.shape
    
    hist_sample_matrix = hist_samples.reshape(m * N, h * 2)
    future_sample_matrix = future_samples.reshape(m * N, f * 2)
    gbst = xgb.XGBRegressor(max_depth = params['max_depth'], min_child_weight = params['min_child_weight'],
                            learning_rate = params['learning_rate'])
    gbst.fit(hist_sample_matrix, future_sample_matrix)
    return gbst

def get_gbst_forecast(gbst_model, history, f):
    N, h, _ = history.shape
    history = history.reshape(N, 2 * h)

    forecast = gbst_model.predict(history)
    return forecast.reshape(N,f,2)

#### hyperparam tuning

In [38]:
def get_gbst_forecast_mse(gbst_model, hists, futures):
    _, _, f, _ = futures.shape
    forecasts = np.array([get_gbst_forecast(gbst_model, hist, f = f) for hist in hists])
    mses = np.array([get_mse(forecasts[i], futures[i]) for i in range(len(futures))])
    return mses

def gbst_val_exp(hist_train, future_train, hist_val, future_val, params):
    h, f = params['h'], params['f']
    model_params = params['model_params']
    gbst_model = fit_gbst_forecaster(hist_train, future_train, model_params)
    val_mses = get_gbst_forecast_mse(gbst_model, hist_val, future_val)
    return np.mean(val_mses)

def gbst_hyperparam_search(data_train, data_val, param_space):

    param_combinations = list(product(
        param_space["max_depth"],
        param_space["learning_rate"],
        param_space["min_child_weight"],
        param_space["h"],
        param_space["f"]
    ))

    num_comb = len(param_combinations)

    best_params = None
    best_score = float("inf")
    for i, combination in enumerate(param_combinations):
        model_params = {
            "max_depth": combination[0],
            "learning_rate": combination[1],
            "min_child_weight": combination[2]
        }
        h, f =  combination[3], combination[4]
        hist_train, future_train = data_slice(data_train, h = h, f = f)
        hist_val, future_val = data_slice(data_val, h = h, f = f)

        params = {'model_params': model_params, 'h':h, 'f':f}
        mse = gbst_val_exp(hist_train, future_train, hist_val, future_val, params)
        if (i+1) % 5 == 0:
            print(f'progress: {i+1}/{num_comb}')
        if mse < best_score:
            best_params = params
            best_score = mse
 
    return best_score, best_params

In [54]:
## found via co-ordinate wise descent. good, but not guaranteed optimal
param_grid = {
    "max_depth": [5, 15, 20],
    "learning_rate": [0.07],
    "min_child_weight": [0],
    "h": np.arange(5,10),
    "f": [1], 
}

In [55]:
best_mse, best_params = gbst_hyperparam_search(data_train, data_val, param_grid)

progress: 5/15
progress: 10/15
progress: 15/15


In [56]:
print(best_mse)
best_params

72.70115763406756


{'model_params': {'max_depth': 15,
  'learning_rate': 0.07,
  'min_child_weight': 0},
 'h': 6,
 'f': 1}

### single example

In [10]:
h, f = 7, 3
hist_train, future_train = data_slice(data_train, h = h, f = f)
hist_val, future_val = data_slice(data_val, h = h, f = f)
hist_test, future_test = data_slice(data_test, h = h, f = f)

In [13]:
h, f = 20, 3
model_params = {'max_depth':6, 'min_child_weight':1, 'learning_rate':1}
params = {'f':f, 'h':h, 'model_params':model_params}
mse = gbst_val_exp(hist_train, future_train, hist_val, future_val, params)
print(mse)

42.122551908979176
