In [1]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from data import load_dataset, get_train_test_split
from interpretation import test_kernel_relevance

In [2]:
filename = "dataset/LoanStats3a.csv"
features, data = load_dataset(filename)
print("Data shape: %s" % str(features.shape))

Preprocessing...
Feature Engineering...
Data shape: (42535, 40)


In [3]:
X_train, X_test, y_train, y_test = get_train_test_split(features, test_size=0.3, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

# Temporarily use subset of data to debug faster
# TODO: Remove
X_train, y_train = X_train[:1000,:], y_train[:1000]
X_val, y_val     = X_val[:500,:], y_val[:500]
X_test, y_test   = X_test[:500,:], y_test[:500]

# Normalize
X_scaler = MinMaxScaler()
X_scaler.fit(X_train)
X_train = X_scaler.transform(X_train)
X_val = X_scaler.transform(X_val)
X_test = X_scaler.transform(X_test)

print("X_train: %s, y_train: %s" % (str(X_train.shape), str(y_train.shape)))
print("X_val: %s, y_val: %s" % (str(X_val.shape), str(y_val.shape)))
print("X_test: %s, y_test: %s" % (str(X_test.shape), str(y_test.shape)))

X_train: (1000, 38), y_train: (1000,)
X_val: (500, 38), y_val: (500,)
X_test: (500, 38), y_test: (500,)


In [4]:
import GPy

# Normalize
y_scaler = MinMaxScaler()
y_scaler.fit(y_train.reshape(-1,1))
y_train_scaled = y_scaler.transform(y_train.reshape(-1,1))

# Input dimension for kernels
input_dim = X_train.shape[1]

In [5]:
kernel = GPy.kern.RBF(input_dim=input_dim, variance=1., lengthscale=1.)
test_kernel_relevance(kernel, X_train, y_train_scaled, X_val, y_val,
            X_test, y_test, X_scaler, y_scaler, top_size=100, optimize_for="profits")

500
Mean: None
Kernel:   [1mrbf.       [0;0m  |           value  |  constraints  |  priors
  [1mvariance   [0;0m  |  0.545076212032  |      +ve      |        
  [1mlengthscale[0;0m  |   8.96438658709  |      +ve      |        
0
Mean: None
Kernel:   [1mrbf.       [0;0m  |           value  |  constraints  |  priors
  [1mvariance   [0;0m  |  0.704373568653  |      +ve      |        
  [1mlengthscale[0;0m  |   10.2211264312  |      +ve      |        
1
Mean: None
Kernel:   [1mrbf.       [0;0m  |          value  |  constraints  |  priors
  [1mvariance   [0;0m  |   0.6803990697  |      +ve      |        
  [1mlengthscale[0;0m  |  10.2393722736  |      +ve      |        
2
Mean: None
Kernel:   [1mrbf.       [0;0m  |           value  |  constraints  |  priors
  [1mvariance   [0;0m  |  0.522757187344  |      +ve      |        
  [1mlengthscale[0;0m  |   8.79833024457  |      +ve      |        
3
Mean: None
Kernel:   [1mrbf.       [0;0m  |           value  |  constrain

KeyboardInterrupt: 