In [29]:
%matplotlib inline
import numpy as np
import pandas as pd
import GPy

from IPython.display import display
GPy.plotting.change_plotting_library('matplotlib')

# Helpers

In [30]:
def filter_coordinates(data, coordinate_limits):
    """
    Returns the filtered coordinates from the dataset.
    
    Params:
        - coordinate_limits (tuple): in the format (MIN_LAT, MIN_LNG, MAX_LAT, MAX_LNG)
    
    Returns:
        - numpy matrix that consists only of the filtered data
    """
    min_lat, min_lng, max_lat, max_lng = coordinate_limits
    
    f1 = data[data[:, 0] > min_lat]
    f2 = f1[f1[:, 0] < max_lat]
    f3 = f2[f2[:, 1] > min_lng]
    f4 = f3[f3[:, 1] < max_lng]
    
    return f4

# Dataset

In [94]:
MAX_AREA = (1.382030, 103.888635, 1.402076, 103.909292)
# MAX_AREA = (1.35081, 103.955765, 1.355955, 103.962786)

In [95]:
tampines = np.load('../data/filtered_data-200.npy')

In [96]:
tampines = filter_coordinates(tampines, MAX_AREA)

In [97]:
tampines_X = tampines[:,0:3]
tampines_y = tampines[:,3]

In [98]:
print(tampines_X.shape)
print(tampines_y.shape)

(2304, 3)
(2304,)


In [48]:
tampines_y.mean()

0.88538629737609331

# Kernels

We are going to experiemnt with combinations of linear and periodic kernels in this notebook.

## RBF Kernel

In [49]:
rbf_ll = GPy.kern.RBF(input_dim=2, active_dims=[0,1])
rbf_lat = GPy.kern.RBF(input_dim=1, active_dims=[0])
rbf_lng = GPy.kern.RBF(input_dim=1, active_dims=[1])
periodic = GPy.kern.StdPeriodic(input_dim=1, active_dims=[2])

In [50]:
k1 = rbf_lat * rbf_lng + periodic
k2 = rbf_lat * rbf_lng * periodic
k3 = rbf_lat + rbf_lng + periodic

# Model

We define a Gaussian Process Regressor with our defined kernel.

In [62]:
m3 = GPy.models.GPRegression(tampines_X, tampines_y[:,None], k3)
display(m3)
m3.optimize(messages=True)
m3.log_likelihood()

GP_regression.,value,constraints,priors
sum.rbf.variance,1.0,+ve,
sum.rbf.lengthscale,1.0,+ve,
sum.rbf_1.variance,1.0,+ve,
sum.rbf_1.lengthscale,1.0,+ve,
sum.std_periodic.variance,1.0,+ve,
sum.std_periodic.period,1.0,+ve,
sum.std_periodic.lengthscale,1.0,+ve,
Gaussian_noise.variance,1.0,+ve,


Running L-BFGS-B (Scipy implementation) Code:
  runtime   i      f              |g|        
    01s40  0003   2.426863e+03   6.805406e+12 
    09s94  0021   1.452180e+07   1.733239e+26 
    14s75  0032   2.024191e+02   1.081065e+01 
Runtime:     14s75
Optimization status: Converged



-202.41910376058075

## Addition of Kernels

In [51]:
m1 = GPy.models.GPRegression(tampines_X, tampines_y[:,None], k1)

In [52]:
display(m1)

GP_regression.,value,constraints,priors
sum.mul.rbf.variance,1.0,+ve,
sum.mul.rbf.lengthscale,1.0,+ve,
sum.mul.rbf_1.variance,1.0,+ve,
sum.mul.rbf_1.lengthscale,1.0,+ve,
sum.std_periodic.variance,1.0,+ve,
sum.std_periodic.period,1.0,+ve,
sum.std_periodic.lengthscale,1.0,+ve,
Gaussian_noise.variance,1.0,+ve,


In [53]:
m1.optimize(messages=True)

Running L-BFGS-B (Scipy implementation) Code:
  runtime   i      f              |g|        
 03m24s13  0006   1.036872e+04   1.483261e+09 
 08m55s63  0016   1.026263e+04   2.165099e+02 
Runtime:  08m55s63
Optimization status: Converged



<paramz.optimization.optimization.opt_lbfgsb at 0x10cd1bc18>

In [54]:
m1.log_likelihood()

-10262.626948220553

In [67]:
m1.predict(tampines_X[:1])

(array([[ 0.08256589]]), array([[ 0.08801507]]))

## Multiplication of Kernels

In [68]:
m2 = GPy.models.GPRegression(tampines_X, tampines_y[:,None], k1)

In [69]:
display(m2)

GP_regression.,value,constraints,priors
sum.mul.rbf.variance,0.998956524479,+ve,
sum.mul.rbf.lengthscale,0.999997413489,+ve,
sum.mul.rbf_1.variance,0.998956524479,+ve,
sum.mul.rbf_1.lengthscale,0.999999898544,+ve,
sum.std_periodic.variance,0.998955091444,+ve,
sum.std_periodic.period,1.00113248784,+ve,
sum.std_periodic.lengthscale,1.00000030242,+ve,
Gaussian_noise.variance,1.0,+ve,


In [70]:
m2.optimize(messages=True)

Running L-BFGS-B (Scipy implementation) Code:
  runtime   i      f              |g|        
    03s99  0009   9.439626e+02   2.826182e+09 
    08s27  0019   9.429101e+02   2.104308e+05 
Runtime:     08s27
Optimization status: Converged



<paramz.optimization.optimization.opt_lbfgsb at 0x11279aa20>

In [71]:
m2.log_likelihood()

-942.91008028994293

# Prediction

Let's try to do some prediction with our trained model.

In [23]:
m2.predict(tampines_X[:1])

(array([[ 0.2883092]]), array([[ 1.01412765]]))