In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import GPy

from IPython.display import display
GPy.plotting.change_plotting_library('matplotlib')

# Helpers

In [2]:
def filter_coordinates(data, coordinate_limits):
    """
    Returns the filtered coordinates from the dataset.
    
    Params:
        - coordinate_limits (tuple): in the format (MIN_LAT, MIN_LNG, MAX_LAT, MAX_LNG)
    
    Returns:
        - numpy matrix that consists only of the filtered data
    """
    min_lat, min_lng, max_lat, max_lng = coordinate_limits
    
    f1 = data[data[:, 0] > min_lat]
    f2 = f1[f1[:, 0] < max_lat]
    f3 = f2[f2[:, 1] > min_lng]
    f4 = f3[f3[:, 1] < max_lng]
    
    return f4

# Dataset

In [3]:
MAX_AREA = (1.35081, 103.955765, 1.355955, 103.962786)

In [4]:
tampines = np.load('../filtered_data.npy')

In [5]:
tampines = filter_coordinates(tampines, MAX_AREA)

In [6]:
tampines_X = tampines[:,0:3]
tampines_y = tampines[:,3]

In [7]:
print(tampines_X.shape)
print(tampines_y.shape)

(18, 3)
(18,)


In [8]:
tampines_y.mean()

1.0

# Kernels

We are going to experiemnt with combinations of linear and periodic kernels in this notebook.

## RBF Kernel

In [9]:
rbf = GPy.kern.RBF(3)

In [10]:
display(rbf)

rbf.,value,constraints,priors
variance,1.0,+ve,
lengthscale,1.0,+ve,


## Periodic Kernel

In [11]:
periodic = GPy.kern.StdPeriodic(3)

In [12]:
display(periodic)

std_periodic.,value,constraints,priors
variance,1.0,+ve,
period,1.0,+ve,
lengthscale,1.0,+ve,


## Combinations of Kernels

In [13]:
k1 = rbf + periodic
k2 = rbf * periodic

# Model

We define a Gaussian Process Regressor with our defined kernel.

## Addition of Kernels

In [14]:
m1 = GPy.models.GPRegression(tampines_X, tampines_y[:,None], k1)

In [15]:
display(m1)

GP_regression.,value,constraints,priors
sum.rbf.variance,1.0,+ve,
sum.rbf.lengthscale,1.0,+ve,
sum.std_periodic.variance,1.0,+ve,
sum.std_periodic.period,1.0,+ve,
sum.std_periodic.lengthscale,1.0,+ve,
Gaussian_noise.variance,1.0,+ve,


In [16]:
m1.optimize(messages=True)

Running L-BFGS-B (Scipy implementation) Code:
  runtime   i      f              |g|        
    00s07  0009  -5.796948e+01   2.924815e+09 
    00s19  0023  -9.687611e+01   4.224047e+11 
    00s27  0043  -1.310620e+02   1.105353e+16 
Runtime:     00s27
Optimization status: Converged





<paramz.optimization.optimization.opt_lbfgsb at 0x109abeda0>

In [17]:
m1.log_likelihood()

131.06195863358968

## Multiplication of Kernels

In [18]:
m2 = GPy.models.GPRegression(tampines_X, tampines_y[:,None], k1)

In [19]:
display(m2)

GP_regression.,value,constraints,priors
sum.rbf.variance,4.2367026592699994e-14,+ve,
sum.rbf.lengthscale,1.0,+ve,
sum.std_periodic.variance,0.405133578085,+ve,
sum.std_periodic.period,1.00013890834,+ve,
sum.std_periodic.lengthscale,1.00082911356,+ve,
Gaussian_noise.variance,1.0,+ve,


In [20]:
m2.optimize(messages=True)

Running L-BFGS-B (Scipy implementation) Code:
  runtime   i      f              |g|        
    00s03  0004   1.599748e+01   1.421591e+09 
    00s13  0015   1.398995e+01   6.926614e+06 
    00s19  0022   1.281772e+01   4.718751e+04 
    00s41  0077  -1.292445e+02   2.179583e+16 
Runtime:     00s41
Optimization status: Converged





<paramz.optimization.optimization.opt_lbfgsb at 0x109ae4a90>

In [21]:
m2.log_likelihood()

129.24446325467414

# Prediction

Let's try to do some prediction with our trained model.

In [22]:
m1.predict(tampines_X[:1])

(array([[ 3.23147897]]), array([[-2.92142351]]))