In [1]:
import torch
from lvgp_pytorch.optim import noise_tune
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from lvgp_pytorch.models import LVGPR
from lvgp_pytorch.optim import fit_model_scipy
from lvgp_pytorch.utils.variables import CategoricalVariable
from lvgp_pytorch.utils.input_space import InputSpace
import timeit
%matplotlib inline
plt.rcParams['figure.dpi']=150
plt.rcParams['font.family']='serif'

In [2]:
mit_data = pd.read_csv("D:/APAR-E/Data/mit_data.csv", delimiter=',', header=0)
x = mit_data[["A", "M", "M'", "X"]]
comps = x.to_numpy()
y = mit_data[["stability"]]
props = y.to_numpy()

# configuration space
config = InputSpace()
A = CategoricalVariable(name="A", levels=np.linspace(1,3,3))
M = CategoricalVariable(name="M", levels=np.linspace(1,6,6))
M1 = CategoricalVariable(name="M1", levels=np.linspace(1,5,5))
S = CategoricalVariable(name="S", levels=np.linspace(1,3,3))
config.add_inputs([A, M, M1, S])


def set_seed(seed):
    random.seed(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)
SEED = 1926
set_seed(SEED)

def get_prop(x_in):
    x_rows = np.shape(x_in)[0]  # Number of x points
    y_in = np.zeros(x_rows)
    for i in range(x_rows):
        idx = np.argwhere(np.all(x_in[i,:]-comps==0, axis=1))
        y_in[i] = props[idx[0,0]]
    return y_in


In [3]:
train_x, test_x = train_test_split(x.values, train_size=0.7, test_size=0.3, random_state=SEED)

train_y = get_prop(train_x)
test_y = get_prop(test_x)


In [4]:
'''
from LVGP_MATLAB_connector import LVGP_MATLAB
start = timeit.default_timer()
model_m = LVGP_MATLAB()
model_m.fit(train_x, train_y[:,np.newaxis], ind_qual=config.qual_index)
test_mean, test_std = model_m.predict(test_x)
stop = timeit.default_timer()
rrmse = np.sqrt(np.mean((test_y[:,np.newaxis]-test_mean)**2))/np.std(test_y)
print('RRMSE: %5.3f'%rrmse.item())
print('Fit time: ', stop - start)
'''

RRMSE: 0.313
Fit time:  135.45243390000002


In [5]:
train_x = torch.tensor(train_x-1).double()
test_x = torch.tensor(test_x-1).double()
train_y = torch.tensor(train_y).double()
test_y = torch.tensor(test_y).double()

start = timeit.default_timer()
model = LVGPR(
    train_x=train_x,
    train_y=train_y,
    qual_index=config.qual_index,
    quant_index=config.quant_index,
    num_levels_per_var=list(config.num_levels.values()),
    noise=1,
    quant_correlation_class="RBFKernel",
    fix_noise=False
).double()

reslist, nll_inc = fit_model_scipy(
    model,
    num_restarts=49,
    options={'ftol':1e-6}
)

_ = model.eval()

stop = timeit.default_timer()
with torch.no_grad():
    test_mean, test_std = model.predict(test_x, return_std=True)
rrmse = torch.mean((test_y-test_mean)**2).sqrt()/test_y.std()
print('RRMSE: %5.3f'%rrmse.item())
print('Fit time: ', stop - start)

RRMSE: 0.315
Fit time:  112.58912699999999


In [6]:
start = timeit.default_timer()
model2 = LVGPR(
    train_x=train_x,
    train_y=train_y,
    qual_index=config.qual_index,
    quant_index=config.quant_index,
    num_levels_per_var=list(config.num_levels.values()),
    quant_correlation_class="RBFKernel",
    noise=1,
    fix_noise=False
).double()

# optimize noise successively
nll_inc_tuned,opt_history = noise_tune(
    model2,
    num_restarts=19, # num of starting points at the largest noise variance
    options={'ftol':1e-8}
)
stop = timeit.default_timer()

print('NLL obtained from multi-start optimization....: %6.2f'%nll_inc)
print('NLL obtained from noise tuning strategy.......: %6.2f'%nll_inc_tuned)

with torch.no_grad():
    test_mean, test_std = model2.predict(test_x, return_std=True)
rrmse = torch.mean((test_y-test_mean)**2).sqrt()/test_y.std()
print('RRMSE: %5.3f'%rrmse.item())
print('Fit time: ', stop - start)

NLL obtained from multi-start optimization....:   0.42
NLL obtained from noise tuning strategy.......:   0.43
RRMSE: 0.314
Fit time:  27.402390000000025
