In [1]:
from gp import *
import matplotlib.pyplot as plt
plt.style.use('ggplot')

In [2]:
import importlib
import gp
importlib.reload(gp)
from gp import *
import matplotlib.pyplot as plt


# Dataset characteristics<br>
- instant: record index<br>
- dteday : date
- season : season (1:springer, 2:summer, 3:fall, 4:winter)
- yr : year (0: 2011, 1:2012)
- mnth : month (1 to 12)
- hr : hour (0 to 23)
- holiday : whether day is holiday or not (extracted from http://dchr.dc.gov/page/holiday-schedule)
- weekday : day of the week
- workingday : if day is neither weekend nor holiday is 1, otherwise is 0.
+ weathersit : 
    - 1: Clear, Few clouds, Partly cloudy, Partly cloudy
    - 2: Mist + Cloudy, Mist + Broken clouds, Mist + Few clouds, Mist
    - 3: Light Snow, Light Rain + Thunderstorm + Scattered clouds, Light Rain + Scattered clouds
    - 4: Heavy Rain + Ice Pallets + Thunderstorm + Mist, Snow + Fog
- temp : Normalized temperature in Celsius. The values are divided to 41 (max)
- atemp: Normalized feeling temperature in Celsius. The values are divided to 50 (max)
- hum: Normalized humidity. The values are divided to 100 (max)
- windspeed: Normalized wind speed. The values are divided to 67 (max)
- casual: count of casual users
- registered: count of registered users
- cnt: count of total rental bikes including both casual and registered

In [3]:
import pandas as pd

df = pd.read_csv("hour.csv")
df

Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0000,3,13,16
1,2,2011-01-01,1,0,1,1,0,6,0,1,0.22,0.2727,0.80,0.0000,8,32,40
2,3,2011-01-01,1,0,1,2,0,6,0,1,0.22,0.2727,0.80,0.0000,5,27,32
3,4,2011-01-01,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0000,3,10,13
4,5,2011-01-01,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0000,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17374,17375,2012-12-31,1,1,12,19,0,1,1,2,0.26,0.2576,0.60,0.1642,11,108,119
17375,17376,2012-12-31,1,1,12,20,0,1,1,2,0.26,0.2576,0.60,0.1642,8,81,89
17376,17377,2012-12-31,1,1,12,21,0,1,1,1,0.26,0.2576,0.60,0.1642,7,83,90
17377,17378,2012-12-31,1,1,12,22,0,1,1,1,0.26,0.2727,0.56,0.1343,13,48,61


# Data Preprocess

In [4]:
reference_date = pd.Timestamp('2011-01-01')
df.iloc[:, 1] = (pd.to_datetime(df.iloc[:, 1]) - reference_date).dt.days
data = df.to_numpy(dtype=float)
data

array([[1.0000e+00, 0.0000e+00, 1.0000e+00, ..., 3.0000e+00, 1.3000e+01,
        1.6000e+01],
       [2.0000e+00, 0.0000e+00, 1.0000e+00, ..., 8.0000e+00, 3.2000e+01,
        4.0000e+01],
       [3.0000e+00, 0.0000e+00, 1.0000e+00, ..., 5.0000e+00, 2.7000e+01,
        3.2000e+01],
       ...,
       [1.7377e+04, 7.3000e+02, 1.0000e+00, ..., 7.0000e+00, 8.3000e+01,
        9.0000e+01],
       [1.7378e+04, 7.3000e+02, 1.0000e+00, ..., 1.3000e+01, 4.8000e+01,
        6.1000e+01],
       [1.7379e+04, 7.3000e+02, 1.0000e+00, ..., 1.2000e+01, 3.7000e+01,
        4.9000e+01]])

In [5]:
X_whole = data[:, 1:14] # the first column is index, not necessary
Y_whole = data[:, 16] # count of total rental bikes as label
X_whole 

array([[0.000e+00, 1.000e+00, 0.000e+00, ..., 2.879e-01, 8.100e-01,
        0.000e+00],
       [0.000e+00, 1.000e+00, 0.000e+00, ..., 2.727e-01, 8.000e-01,
        0.000e+00],
       [0.000e+00, 1.000e+00, 0.000e+00, ..., 2.727e-01, 8.000e-01,
        0.000e+00],
       ...,
       [7.300e+02, 1.000e+00, 1.000e+00, ..., 2.576e-01, 6.000e-01,
        1.642e-01],
       [7.300e+02, 1.000e+00, 1.000e+00, ..., 2.727e-01, 5.600e-01,
        1.343e-01],
       [7.300e+02, 1.000e+00, 1.000e+00, ..., 2.727e-01, 6.500e-01,
        1.343e-01]])

In [6]:
def standardize(X):
    mean = np.mean(X, axis=0)
    std = np.std(X, axis=0)
    return (X-mean)/std, mean, std

def split_train_test(X_whole, Y_whole, num=1500, rate=0.7):
    # Shuffle data first
    indices = jax.random.permutation(grab_prng(), X_whole.shape[0])
    X_part = X_whole[indices]
    Y_part = Y_whole[indices]
    
    indices = jax.random.permutation(grab_prng(), num)
    X_shuffled = X_part[indices]
    Y_shuffled = Y_part[indices]
    
    # Standardize the shuffled data
    X, mean, std = standardize(X_shuffled)
    Y, mean, std = standardize(Y_shuffled)

    # Use the same indices for Y
    # Y = Y_shuffled
    
    # Compute the split index
    split_idx = int(num * rate)
    
    # Split indices into train and test
    train_indices = indices[:split_idx]
    test_indices = indices[split_idx:]
    
    # Split data using the indices
    X_train = X[train_indices]
    Y_train = Y[train_indices]
    X_test = X[test_indices]
    Y_test = Y[test_indices]
    
    return X_train, Y_train, X_test, Y_test


In [7]:
def split_clml(X_train, Y_train, ratio=0.8):
    n_samples = X_train.shape[0]
    indices = jax.random.permutation(grab_prng(), n_samples)
    # Shuffle the data
    X_train_shuffled = X_train[indices]
    Y_train_shuffled = Y_train[indices]
    
    # Calculate the base size of each subset
    subset_size = int(n_samples * ratio)
    
    # Initialize lists to hold the subsets
    X_con = [X_train_shuffled[:subset_size], X_train_shuffled[subset_size:]]
    Y_con = [Y_train_shuffled[:subset_size], Y_train_shuffled[subset_size:]]
    
    return X_con, Y_con

def split_sequence(X_train, Y_train, seq_len=8):
    n_samples = X_train.shape[0]
    indices = jax.random.permutation(grab_prng(), n_samples)
     # Shuffle the data
    X_train_shuffled = X_train[indices]
    Y_train_shuffled = Y_train[indices]
    
    # Calculate the base size of each subset
    subset_size = n_samples // seq_len
    remainder = n_samples % seq_len
    
    # Initialize lists to hold the subsets
    X_seq = []
    Y_seq = []
    
    start_idx = 0
    for i in range(seq_len):
        # The first subset gets the remainder (extra) samples
        if i == 0:
            end_idx = start_idx + subset_size + remainder
        else:
            end_idx = start_idx + subset_size
        
        X_seq.append(X_train_shuffled[start_idx:end_idx])
        Y_seq.append(Y_train_shuffled[start_idx:end_idx])
        
        start_idx = end_idx
    return X_seq, Y_seq

# Plotting code

In [8]:
def scatterplot(lml_lpd, label_x, label_y, tit):
    plt.scatter(lml_lpd[:,0], lml_lpd[:,1])
    plt.xlabel(label_x)
    plt.ylabel(label_y)
    plt.title(tit)
    plt.legend()
    plt.show()
#

def plot_comparision(lml, clml, clmls):
    plt.scatter(range(len(lml)), lml, label="LML", color="red", marker="o")
    plt.scatter(range(len(clml)), clml, label="CLML", color="blue", marker="x")
    plt.scatter(range(len(clmls)), clmls, label="CLMLS", color="green", marker="s")

    plt.xlabel("Hyperparameter Setting Index")
    plt.ylabel("NLL")
    plt.title("Comparison Between Different Generalization Metrics")
    plt.legend()
    plt.show()
#

# Experiment 2 - feature importance using learned Mahalanobis distances

In [9]:
def random_init_sqexp_maha_hyperparams(X_train):
    var = jax.random.uniform(grab_prng(), shape=(2,), minval=0.2, maxval=2)
    length_scale = jax.random.uniform(grab_prng(), shape=(X_train.shape[1],), minval=1, maxval=10)
    return np.concatenate([var, length_scale])

def lml_exp(hyperparams_inits, T=200, step_size=1e-4):
    lml_trials = []
    opt_hyperparams = []
    for n in range(n_trials):
        hyperparams_init = hyperparams_inits[n]
        unconstrained_hyperparams_init = inverse_param_transform(hyperparams_init)
        (unconstrained_hyperparams, lml) = empirical_bayes(cov_func, X_train, Y_train, unconstrained_hyperparams_init, step_size, T)
        hyperparams = param_transform(unconstrained_hyperparams)
        
        posterior_predictive = gp_posterior(cov_func, X_train, Y_train, hyperparams_init)
        (posterior_mean, posterior_var) = posterior_predictive(X_star)
        neg_log_initial = neg_log_predictive_density(Y_test, posterior_mean, posterior_var, hyperparams_init[0])

        posterior_predictive = gp_posterior(cov_func, X_train, Y_train, hyperparams)
        (posterior_mean, posterior_var) = posterior_predictive(X_star)
        neg_log_final = neg_log_predictive_density(Y_test, posterior_mean, posterior_var, hyperparams[0])
        print(np.array([neg_log_initial, neg_log_final, lml]))
        lml_trials.append(np.array([neg_log_initial, neg_log_final, lml]))
        opt_hyperparams.append(hyperparams)
    # #
    trials_array = np.array(lml_trials)
    lml_nlpd = np.column_stack((trials_array[:, 2], trials_array[:, 1]))
    return lml_nlpd, opt_hyperparams

def clml_exp(hyperparams_inits, T=200, step_size=1e-3):
    clml_trials = []
    opt_hyperparams = []
    for n in range(n_trials):
        hyperparams_init = hyperparams_inits[n]
        unconstrained_hyperparams_init = inverse_param_transform(hyperparams_init)
        (unconstrained_hyperparams, clml) = clml_opt(cov_func, X_con, Y_con, unconstrained_hyperparams_init, step_size, T)
        hyperparams = param_transform(unconstrained_hyperparams)

        posterior_predictive = gp_posterior(cov_func, X_train, Y_train, hyperparams_init)
        (posterior_mean, posterior_var) = posterior_predictive(X_star)
        neg_log_initial = neg_log_predictive_density(Y_test, posterior_mean, posterior_var, hyperparams_init[0])

        posterior_predictive = gp_posterior(cov_func, X_train, Y_train, hyperparams)
        (posterior_mean, posterior_var) = posterior_predictive(X_star)
        neg_log_final = neg_log_predictive_density(Y_test, posterior_mean, posterior_var, hyperparams[0])
        print(np.array([neg_log_initial, neg_log_final, clml]))
        clml_trials.append(np.array([neg_log_initial, neg_log_final, clml]))
        opt_hyperparams.append(hyperparams)

    #
    trials_array = np.array(clml_trials)
    clml_nlpd = np.column_stack((trials_array[:, 2], trials_array[:, 1]))
    return clml_nlpd, opt_hyperparams

def clmls_exp(hyperparams_inits, T=200, step_size=5e-3):
    clmls_trials = []
    opt_hyperparams = []
    for n in range(n_trials):
        hyperparams_init = hyperparams_inits[n]
        unconstrained_hyperparams_init = inverse_param_transform(hyperparams_init)
        (unconstrained_hyperparams, clmls) = clmls_opt(cov_func, X_seq, Y_seq, unconstrained_hyperparams_init, step_size, T)
        hyperparams = param_transform(unconstrained_hyperparams)
        
        posterior_predictive = gp_posterior(cov_func, X_train, Y_train, hyperparams_init)
        (posterior_mean, posterior_var) = posterior_predictive(X_star)
        neg_log_initial = neg_log_predictive_density(Y_test, posterior_mean, posterior_var, hyperparams_init[0])

        posterior_predictive = gp_posterior(cov_func, X_train, Y_train, hyperparams)
        (posterior_mean, posterior_var) = posterior_predictive(X_star)
        neg_log_final = neg_log_predictive_density(Y_test, posterior_mean, posterior_var, hyperparams[0])
        print(np.array([neg_log_initial, neg_log_final, clmls]))
        clmls_trials.append(np.array([neg_log_initial, neg_log_final, clmls]))
        opt_hyperparams.append(hyperparams)
    #
    trials_array = np.array(clmls_trials)
    clmls_nlpd = np.column_stack((trials_array[:, 2], trials_array[:, 1]))
    return clmls_nlpd, opt_hyperparams


In [None]:
# experiment settings

X_train, Y_train, X_test, Y_test = split_train_test(X_whole, Y_whole, num=3000, rate=0.7)
X_con, Y_con = split_clml(X_train, Y_train, ratio=0.8)
X_seq, Y_seq = split_sequence(X_train, Y_train, seq_len=10)

n_trials = 30
cov_func = sqexp_mahalanobis_cov_function
X_star = X_test
hyperparams_inits = []
for i in range(n_trials):
    hyperparams_inits.append(random_init_sqexp_maha_hyperparams(X_train))


In [None]:
lml_nlpd, lml_opt_hyperparams = lml_exp(hyperparams_inits, T=200, step_size=1e-4)
scatterplot(lml_nlpd, 'LML', 'NLPD', 'Generalization of LML')
clml_nlpd, clml_opt_hyperparams = clml_exp(hyperparams_inits, T=200, step_size=1e-3)
scatterplot(clml_nlpd, 'CLML', 'NLPD', 'Generalization of CLML')
clmls_nlpd, clmls_opt_hyperparams = clmls_exp(hyperparams_inits, T=200, step_size=5e-3)
scatterplot(clmls_nlpd, 'CLMLS', 'NLPD', 'Generalization of CLMLS')
plot_comparision(lml_nlpd[:, 1], clml_nlpd[:, 1], clmls_nlpd[:, 1])

In [None]:
lml_nlpd, lml_opt_hyperparams = lml_exp(hyperparams_inits, T=100, step_size=1e-3)
scatterplot(lml_nlpd, 'LML', 'NLPD', 'Generalization of LML')
clml_nlpd, clml_opt_hyperparams = clml_exp(hyperparams_inits, T=100, step_size=5e-3)
scatterplot(clml_nlpd, 'CLML', 'NLPD', 'Generalization of CLML')
clmls_nlpd, clmls_opt_hyperparams = clmls_exp(hyperparams_inits, T=100, step_size=8e-3)
scatterplot(clmls_nlpd, 'CLMLS', 'NLPD', 'Generalization of CLMLS')
plot_comparision(lml_nlpd[:, 1], clml_nlpd[:, 1], clmls_nlpd[:, 1])

In [None]:
lml_nlpd, lml_opt_hyperparams = lml_exp(hyperparams_inits, T=200, step_size=1e-3)
scatterplot(lml_nlpd, 'LML', 'NLPD', 'Generalization of LML')
clml_nlpd, clml_opt_hyperparams = clml_exp(hyperparams_inits, T=200, step_size=5e-3)
scatterplot(clml_nlpd, 'CLML', 'NLPD', 'Generalization of CLML')
clmls_nlpd, clmls_opt_hyperparams = clmls_exp(hyperparams_inits, T=200, step_size=8e-3)
scatterplot(clmls_nlpd, 'CLMLS', 'NLPD', 'Generalization of CLMLS')
plot_comparision(lml_nlpd[:, 1], clml_nlpd[:, 1], clmls_nlpd[:, 1])

In [None]:
import importlib
import gp
importlib.reload(gp)
from gp import *
import matplotlib.pyplot as plt
plt.style.use('ggplot')

In [None]:
unconstrained_hyperparams=inverse_param_transform(hyperparams_inits[0])

clmls_function = conditional_log_marginal_likelihood_sequence(cov_func, X_seq, Y_seq, whole_sequence=True)
clmls_vals=clmls_function(unconstrained_hyperparams)
clmls_vals


In [None]:
a,b =opt_clmls(clmls_vals)
print(a,b)
x_data = np.arange(len(clmls_vals))
y_data = np.array(clmls_vals)
x_fit = np.linspace(min(x_data), max(x_data), 100)  # Smooth x range for the fitted curve
y_fit = a + b * x_fit

# Plot the data
plt.scatter(x_data, y_data, color='blue', label='Sequence', zorder=5)

# Plot the fitted model
plt.plot(x_fit, y_fit, color='red', label='Regression Model', zorder=4)

# Add labels, legend, and title
plt.xlabel('CLMLS index')
plt.ylabel('CLMLS value')
plt.title('CLMLS fitting')
plt.legend()


In [10]:
# experiment settings

X_train, Y_train, X_test, Y_test = split_train_test(X_whole, Y_whole, num=3000, rate=0.91)
X_con, Y_con = split_clml(X_train, Y_train, ratio=0.8)
X_seq, Y_seq = split_sequence(X_train, Y_train, seq_len=10)

n_trials = 30
cov_func = sqexp_mahalanobis_cov_function
X_star = X_test
hyperparams_inits = []
for i in range(n_trials):
    hyperparams_inits.append(random_init_sqexp_maha_hyperparams(X_train))


In [None]:
lml_nlpd, lml_opt_hyperparams = lml_exp(hyperparams_inits, T=200, step_size=1e-4)
scatterplot(lml_nlpd, 'LML', 'NLPD', 'Generalization of LML')
clml_nlpd, clml_opt_hyperparams = clml_exp(hyperparams_inits, T=200, step_size=8e-4)
scatterplot(clml_nlpd, 'CLML', 'NLPD', 'Generalization of CLML')
clmls_nlpd, clmls_opt_hyperparams = clmls_exp(hyperparams_inits, T=200, step_size=10e-4)
scatterplot(clmls_nlpd, 'CLMLS', 'NLPD', 'Generalization of CLMLS')
plot_comparision(lml_nlpd[:, 1], clml_nlpd[:, 1], clmls_nlpd[:, 1])

In [None]:
lml_nlpd, lml_opt_hyperparams = lml_exp(hyperparams_inits, T=100, step_size=1e-3)
scatterplot(lml_nlpd, 'LML', 'NLPD', 'Generalization of LML')
clml_nlpd, clml_opt_hyperparams = clml_exp(hyperparams_inits, T=100, step_size=8e-3)
scatterplot(clml_nlpd, 'CLML', 'NLPD', 'Generalization of CLML')
clmls_nlpd, clmls_opt_hyperparams = clmls_exp(hyperparams_inits, T=100, step_size=10e-3)
scatterplot(clmls_nlpd, 'CLMLS', 'NLPD', 'Generalization of CLMLS')
plot_comparision(lml_nlpd[:, 1], clml_nlpd[:, 1], clmls_nlpd[:, 1])

In [None]:
lml_nlpd, lml_opt_hyperparams = lml_exp(hyperparams_inits, T=200, step_size=1e-3)
scatterplot(lml_nlpd, 'LML', 'NLPD', 'Generalization of LML')
clml_nlpd, clml_opt_hyperparams = clml_exp(hyperparams_inits, T=200, step_size=8e-3)
scatterplot(clml_nlpd, 'CLML', 'NLPD', 'Generalization of CLML')
clmls_nlpd, clmls_opt_hyperparams = clmls_exp(hyperparams_inits, T=200, step_size=10e-3)
scatterplot(clmls_nlpd, 'CLMLS', 'NLPD', 'Generalization of CLMLS')
plot_comparision(lml_nlpd[:, 1], clml_nlpd[:, 1], clmls_nlpd[:, 1])

0 -310.3557588441205
1 -252.3113385937163
2 -213.11693923914464
3 -160.4400299045233


KeyboardInterrupt: 