In [15]:
%matplotlib inline
import numpy as np
import pandas as pd
import GPy

from IPython.display import display
GPy.plotting.change_plotting_library('matplotlib')

# Helpers

In [16]:
def filter_coordinates(data, coordinate_limits):
    """
    Returns the filtered coordinates from the dataset.
    
    Params:
        - coordinate_limits (tuple): in the format (MIN_LAT, MIN_LNG, MAX_LAT, MAX_LNG)
    
    Returns:
        - numpy matrix that consists only of the filtered data
    """
    min_lat, min_lng, max_lat, max_lng = coordinate_limits
    
    f1 = data[data[:, 0] > min_lat]
    f2 = f1[f1[:, 0] < max_lat]
    f3 = f2[f2[:, 1] > min_lng]
    f4 = f3[f3[:, 1] < max_lng]
    
    return f4

# Dataset

In [17]:
MAX_AREA = (1.35081, 103.955765, 1.355955, 103.962786)

In [18]:
tampines = np.load('../filtered_data.npy')

In [19]:
tampines = filter_coordinates(tampines, MAX_AREA)

In [20]:
tampines_X = tampines[:,0:3]
tampines_y = tampines[:,3]

In [21]:
print(tampines_X.shape)
print(tampines_y.shape)

(1584, 3)
(1584,)


In [22]:
tampines_y.mean()

0.011363636363636364

# Kernels

We are going to experiemnt with combinations of linear and periodic kernels in this notebook.

## RBF Kernel

In [23]:
rbf = GPy.kern.RBF(3)

In [24]:
display(rbf)

rbf.,value,constraints,priors
variance,1.0,+ve,
lengthscale,1.0,+ve,


## Periodic Kernel

In [25]:
periodic = GPy.kern.StdPeriodic(3)

In [26]:
display(periodic)

std_periodic.,value,constraints,priors
variance,1.0,+ve,
period,1.0,+ve,
lengthscale,1.0,+ve,


## Combinations of Kernels

In [27]:
k1 = rbf + periodic
k2 = rbf * periodic

# Model

We define a Gaussian Process Regressor with our defined kernel.

## Addition of Kernels

In [28]:
m1 = GPy.models.GPRegression(tampines_X, tampines_y[:,None], k1)

In [29]:
display(m1)

GP_regression.,value,constraints,priors
sum.rbf.variance,1.0,+ve,
sum.rbf.lengthscale,1.0,+ve,
sum.std_periodic.variance,1.0,+ve,
sum.std_periodic.period,1.0,+ve,
sum.std_periodic.lengthscale,1.0,+ve,
Gaussian_noise.variance,1.0,+ve,


In [30]:
m1.optimize(messages=True)

Running L-BFGS-B (Scipy implementation) Code:
  runtime   i      f              |g|        
    01s58  0001   1.534326e+03   2.345648e+05 
    15s39  0009  -1.206505e+03   3.564582e+09 
    25s04  0015  -1.206572e+03   3.127318e+02 
 01m01s60  0031  -1.253787e+03   3.089837e+07 
 01m03s12  0032   7.389629e+02   1.352324e+14 
 01m27s18  0048  -1.325311e+03   1.504979e+11 
 01m46s45  0059  -1.335540e+03   4.780037e+08 
 01m52s47  0063  -1.335541e+03   6.834537e+05 
Runtime:  01m52s47
Optimization status: Converged



<paramz.optimization.optimization.opt_lbfgsb at 0x10bab0e80>

In [31]:
m1.log_likelihood()

1335.5412124766672

In [41]:
m1.predict(tampines_X[:1])

(array([[ 0.15639185]]), array([[-0.10304917]]))

## Multiplication of Kernels

In [32]:
m2 = GPy.models.GPRegression(tampines_X, tampines_y[:,None], k1)

In [33]:
display(m2)

GP_regression.,value,constraints,priors
sum.rbf.variance,2.69715692994e-06,+ve,
sum.rbf.lengthscale,1.0,+ve,
sum.std_periodic.variance,0.819223151152,+ve,
sum.std_periodic.period,0.999168374284,+ve,
sum.std_periodic.lengthscale,1.24386092007,+ve,
Gaussian_noise.variance,1.0,+ve,


In [34]:
m2.optimize(messages=True)

Running L-BFGS-B (Scipy implementation) Code:
  runtime   i      f              |g|        
    03s17  0002   1.473958e+03   4.824815e+08 
    15s05  0010   1.474067e+03   1.901192e+08 
    37s27  0025   1.468091e+03   5.007627e+06 
Runtime:     37s27
Optimization status: Converged



<paramz.optimization.optimization.opt_lbfgsb at 0x10e14b518>

In [35]:
m2.log_likelihood()

-1468.0910975477736

# Prediction

Let's try to do some prediction with our trained model.

In [40]:
m2.predict(tampines_X[:1])

(array([[ 0.01162504]]), array([[ 1.00066177]]))