In [1]:
import numpy as np
import pandas as pd
import GPy

from sklearn.gaussian_process.kernels import RBF
from sklearn.gaussian_process import GaussianProcessClassifier

from sklearn.model_selection import train_test_split



In [2]:
import itertools
datafile = 'data/jass/rnd_01.csv'
col_names = [a+str(b) for (a,b) in itertools.product([farbe for farbe in 'HKSE'], [bild for bild in range(9)])]+["Geschoben", "Player", "Aktion"]
data = pd.read_csv(datafile, header=None, names=col_names)
data

Unnamed: 0,H0,H1,H2,H3,H4,H5,H6,H7,H8,K0,...,E2,E3,E4,E5,E6,E7,E8,Geschoben,Player,Aktion
0,0,0,1,0,0,0,1,0,0,1,...,0,0,0,0,1,0,0,1,1631,2
1,0,0,1,0,1,1,0,0,0,0,...,0,0,0,0,0,1,0,0,64310,6
2,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,1,0,1,0,16721,1
3,1,0,0,0,1,0,0,0,1,1,...,0,0,1,0,1,0,0,1,0,4
4,1,1,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,1,72620,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161689,0,1,0,0,0,1,0,0,0,0,...,0,1,0,1,1,0,0,1,55942,3
161690,0,0,1,0,0,0,0,1,0,1,...,0,1,0,0,1,0,0,0,0,6
161691,0,0,0,0,0,0,1,0,0,0,...,0,0,1,1,0,1,0,0,0,3
161692,0,1,0,1,0,0,0,0,1,1,...,1,0,0,0,0,0,1,0,60659,6


## Binary Classification on targets 0 and 1

In [3]:
# select subset of data
target_classes = [0,1]
zeroone = data[data['Aktion'].isin(target_classes)]
zeroone = zeroone.drop(columns='Player')
X = zeroone.loc[:,'H0':'Geschoben'].values
y = zeroone.loc[:,'Aktion']

# make sure targets are binary (required by GPC implementation of sklearn)
y = y.isin(target_classes[0:1]) # first class = 1, second = 0
y = y.values

In [51]:
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=50, test_size=10000, random_state=23)
print('Train: %i, Test: %i' % (len(y_train), len(y_test)))

Train: 50, Test: 10000


### With GPy (uses EP approx)

In [55]:
%%time
m = GPy.models.GPClassification(X_train,y_train.reshape(-1,1))

CPU times: user 60 ms, sys: 70 ms, total: 130 ms
Wall time: 179 ms


In [56]:
%%time
for i in range(3):
    m.optimize('bfgs', max_iters=100)

CPU times: user 670 ms, sys: 420 ms, total: 1.09 s
Wall time: 960 ms


In [57]:
pred_prob_means = m.predict(X_test)[0].reshape(-1)
pred = pred_prob_means > 0.5
score = np.equal(pred, y_test).mean()
print('Accuracy: %.3f' % score)
print('Kernel Hyperparams:\n  variance    %10.1f\n  length_scale%10.1f\nLog Marginal Likelihood: %.1f' % (m.kern.variance, m.kern.lengthscale, m.log_likelihood()))

Accuracy: 0.845
Kernel Hyperparams:
  variance          19.0
  length_scale       6.4
Log Marginal Likelihood: -27.7


In [58]:
m.save('test')

In [59]:
import h5py
filename = 'test'

with h5py.File(filename, 'r') as f:
    d = list(f['param_array'])
m2 = GPy.models.GPClassification(X_train, y_train.reshape(-1,1))
m2[:] = d

In [60]:
pred_prob_means = m2.predict(X_test)[0].reshape(-1)
pred = pred_prob_means > 0.5
score = np.equal(pred, y_test).mean()
print('Accuracy: %.3f' % score)
print('Kernel Hyperparams:\n  variance    %10.1f\n  length_scale%10.1f\nLog Marginal Likelihood: %.1f' % (m2.kern.variance, m2.kern.lengthscale, m2.log_likelihood()))

Accuracy: 0.840
Kernel Hyperparams:
  variance          19.0
  length_scale       6.4
Log Marginal Likelihood: -31.0


### Increasing training set size

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=1000, test_size=10000, random_state=23)
print('Train: %i, Test: %i' % (len(y_train), len(y_test)))

Train: 1000, Test: 10000


In [12]:
%%time
m = GPy.models.GPClassification(X_train,y_train.reshape(-1,1))

CPU times: user 1min 17s, sys: 26.7 s, total: 1min 44s
Wall time: 52.5 s


In [13]:
%%time
for i in range(3):
    m.optimize('bfgs', max_iters=100)

CPU times: user 6min 13s, sys: 2min 10s, total: 8min 24s
Wall time: 4min 13s


In [14]:
pred_prob_means = m.predict(X_test)[0].reshape(-1)
pred = pred_prob_means > 0.5
score = np.equal(pred, y_test).mean()
print('Accuracy: %.3f' % score)
print('Kernel Hyperparams:\n  variance    %10.1f\n  length_scale%10.1f\nLog Marginal Likelihood: %.1f' % (m.kern.variance, m.kern.lengthscale, m.log_likelihood()))

Accuracy: 0.961
Kernel Hyperparams:
  variance        2516.6
  length_scale      33.7
Log Marginal Likelihood: -169.2


## Sparse Gaussian Process

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=1000, test_size=10000, random_state=23)
print('Train: %i, Test: %i' % (len(y_train), len(y_test)))

Train: 1000, Test: 10000


In [16]:
%%time
m = GPy.models.SparseGPClassification(X_train,y_train.reshape(-1,1), num_inducing=150)

CPU times: user 11.8 s, sys: 2.7 s, total: 14.5 s
Wall time: 7.29 s


In [17]:
%%time
for i in range(6):
    m.optimize('bfgs', max_iters=100)
    print(m)


Name : SparseGPClassification
Objective : 373.6552930688292
Number of Parameters : 5552
Number of Optimization Parameters : 5552
Updates : True
Parameters:
  [1mSparseGPClassification.[0;0m  |               value  |  constraints  |  priors
  [1minducing_inputs        [0;0m  |           (150, 37)  |               |        
  [1mrbf.variance           [0;0m  |   94.44726158807654  |      +ve      |        
  [1mrbf.lengthscale        [0;0m  |  29.308401818045912  |      +ve      |        

Name : SparseGPClassification
Objective : 195.13813730717948
Number of Parameters : 5552
Number of Optimization Parameters : 5552
Updates : True
Parameters:
  [1mSparseGPClassification.[0;0m  |              value  |  constraints  |  priors
  [1minducing_inputs        [0;0m  |          (150, 37)  |               |        
  [1mrbf.variance           [0;0m  |  811.4500821743981  |      +ve      |        
  [1mrbf.lengthscale        [0;0m  |  33.82446014888173  |      +ve      |        



In [18]:
pred_prob_means = m.predict(X_test)[0].reshape(-1)
pred = pred_prob_means > 0.5
score = np.equal(pred, y_test).mean()
print('Accuracy: %.3f' % score)
print('Kernel Hyperparams:\n  variance    %10.1f\n  length_scale%10.1f\nLog Marginal Likelihood: %.1f' % (m.kern.variance, m.kern.lengthscale, m.log_likelihood()))

Accuracy: 0.960
Kernel Hyperparams:
  variance         812.1
  length_scale      20.5
Log Marginal Likelihood: -169.9


The log-marginal-likelihood of the full and the sparse model is equal (~169). This implies that a sparse model with 150 inducing variables explains the data as well as the full model with 1000 training points!

## Sparse Gaussian Process (on 10'0000)

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=10000, test_size=10000, random_state=23)
print('Train: %i, Test: %i' % (len(y_train), len(y_test)))

Train: 10000, Test: 10000


In [20]:
%%time
m = GPy.models.SparseGPClassification(X_train,y_train.reshape(-1,1), num_inducing=150)

CPU times: user 22min 36s, sys: 4min 24s, total: 27min
Wall time: 14min 9s


In [21]:
%%time
for i in range(4):
    m.optimize('bfgs', max_iters=100)
    print(m)


Name : SparseGPClassification
Objective : 2843.657954695911
Number of Parameters : 5552
Number of Optimization Parameters : 5552
Updates : True
Parameters:
  [1mSparseGPClassification.[0;0m  |              value  |  constraints  |  priors
  [1minducing_inputs        [0;0m  |          (150, 37)  |               |        
  [1mrbf.variance           [0;0m  |  401.8762119633459  |      +ve      |        
  [1mrbf.lengthscale        [0;0m  |  56.47594833397397  |      +ve      |        

Name : SparseGPClassification
Objective : 1059.4795180017463
Number of Parameters : 5552
Number of Optimization Parameters : 5552
Updates : True
Parameters:
  [1mSparseGPClassification.[0;0m  |               value  |  constraints  |  priors
  [1minducing_inputs        [0;0m  |           (150, 37)  |               |        
  [1mrbf.variance           [0;0m  |   403.9114604336496  |      +ve      |        
  [1mrbf.lengthscale        [0;0m  |  21.317380886748158  |      +ve      |        



In [29]:
pred_prob_means = m.predict(X_test)[0].reshape(-1)
pred = pred_prob_means > 0.5
score = np.equal(pred, y_test).mean()
print('Accuracy: %.3f' % score)
print('Kernel Hyperparams:\n  variance    %10.1f\n  length_scale%10.1f\nLog Marginal Likelihood: %.1f' % (m.kern.variance, m.kern.lengthscale, m.log_likelihood()))

Accuracy: 0.964
Kernel Hyperparams:
  variance         404.1
  length_scale      18.4
Log Marginal Likelihood: -1041.5
