In [16]:
%matplotlib inline
import numpy as np
import pandas as pd
import GPy

from IPython.display import display
GPy.plotting.change_plotting_library('matplotlib')

# Helpers

In [17]:
def filter_coordinates(data, coordinate_limits):
    """
    Returns the filtered coordinates from the dataset.
    
    Params:
        - coordinate_limits (tuple): in the format (MIN_LAT, MIN_LNG, MAX_LAT, MAX_LNG)
    
    Returns:
        - numpy matrix that consists only of the filtered data
    """
    min_lat, min_lng, max_lat, max_lng = coordinate_limits
    
    f1 = data[data[:, 0] > min_lat]
    f2 = f1[f1[:, 0] < max_lat]
    f3 = f2[f2[:, 1] > min_lng]
    f4 = f3[f3[:, 1] < max_lng]
    
    return f4

# Dataset

In [18]:
MAX_AREA = (1.35081, 103.955765, 1.355955, 103.962786)

In [19]:
sengkang = np.load('../data/sk-data.npy')
tampines = np.load('../data/tp-data.npy')

In [20]:
tampines = filter_coordinates(tampines, MAX_AREA)

In [21]:
tampines_X = tampines[:,0:3]
tampines_y = tampines[:,3]

In [22]:
print(tampines_X.shape)
print(tampines_y.shape)

(64, 3)
(64,)


In [23]:
tampines_y.mean()

1.0

# Kernels

We are going to experiemnt with combinations of linear and periodic kernels in this notebook.

## Linear Kernel

In [24]:
linear = GPy.kern.Linear(3)

In [25]:
display(linear)

linear.,value,constraints,priors
variances,1.0,+ve,


## Periodic Kernel

In [26]:
periodic = GPy.kern.StdPeriodic(3)

In [27]:
display(periodic)

std_periodic.,value,constraints,priors
variance,1.0,+ve,
period,1.0,+ve,
lengthscale,1.0,+ve,


## Combinations of Kernels

In [28]:
k1 = linear + periodic
k2 = linear * periodic

# Model

We define a Gaussian Process Regressor with our defined kernel.

## Addition of Kernels

In [29]:
m1 = GPy.models.GPRegression(tampines_X, tampines_y[:,None], k1)

In [30]:
display(m1)

GP_regression.,value,constraints,priors
sum.linear.variances,1.0,+ve,
sum.std_periodic.variance,1.0,+ve,
sum.std_periodic.period,1.0,+ve,
sum.std_periodic.lengthscale,1.0,+ve,
Gaussian_noise.variance,1.0,+ve,


In [31]:
m1.optimize(messages=True)

Running L-BFGS-B (Scipy implementation) Code:
  runtime   i      f              |g|        
    00s01  0002   9.554202e+02   1.562628e-01 
    00s12  0013  -4.132577e+02   1.164804e+10 
    00s29  0037  -4.134600e+02   3.741082e+07 
Runtime:     00s29
Optimization status: Converged



<paramz.optimization.optimization.opt_lbfgsb at 0x10a8942e8>

In [32]:
m1.log_likelihood()

413.45999011556819

## Multiplication of Kernels

In [33]:
m2 = GPy.models.GPRegression(tampines_X, tampines_y[:,None], k1)

In [34]:
display(m2)

GP_regression.,value,constraints,priors
sum.linear.variances,5.56268464627e-309,+ve,
sum.std_periodic.variance,0.99527866302,+ve,
sum.std_periodic.period,0.999592831592,+ve,
sum.std_periodic.lengthscale,1.00017575001,+ve,
Gaussian_noise.variance,1.0,+ve,


In [35]:
m2.optimize(messages=True)

Running L-BFGS-B (Scipy implementation) Code:
  runtime   i      f              |g|        
    00s02  0004   6.367038e+01   4.371007e+05 
    00s10  0013   6.364405e+01   6.838188e+06 
    00s18  0020   6.365346e+01   2.145154e+07 
    00s58  0088  -5.141803e+02   1.046824e+14 
Runtime:     00s58
Optimization status: Converged





<paramz.optimization.optimization.opt_lbfgsb at 0x10a88f160>

In [36]:
m2.log_likelihood()

514.18027272331392