In [2]:
%matplotlib inline
import numpy as np
import pandas as pd
import GPy

from IPython.display import display
GPy.plotting.change_plotting_library('matplotlib')

# Helpers

In [3]:
def filter_coordinates(data, coordinate_limits):
    """
    Returns the filtered coordinates from the dataset.
    
    Params:
        - coordinate_limits (tuple): in the format (MIN_LAT, MIN_LNG, MAX_LAT, MAX_LNG)
    
    Returns:
        - numpy matrix that consists only of the filtered data
    """ 
    min_lat, min_lng, max_lat, max_lng = coordinate_limits
    
    f1 = data[data[:, 0] > min_lat]
    f2 = f1[f1[:, 0] < max_lat]
    f3 = f2[f2[:, 1] > min_lng]
    f4 = f3[f3[:, 1] < max_lng]
    
    return f4

In [4]:
def filter_by_index(data, coordinate_indexes):
    """
    Returns the coordinates that bounds the index of the location matrix.
    
    Params:
        - data (numpy matrix): dataset
        - coordinate_indexes (tuple): in the format: (ROW_MIN, COL_MIN, ROW_MAX, COL_MAX)
        
    Returns:
        - tuple of the following format: (MIN_LAT, MIN_LNG, MAX_LAT, MAX_LNG)
    """
    row_min, col_min, row_max, col_max = coordinate_indexes

    f1 = data[data[:, 4] > row_min]
    f2 = f1[f1[:, 4] < row_max]
    f3 = f2[f2[:, 5] > col_min]
    f4 = f3[f3[:, 5] < col_max]

    return f4

# Dataset

In [5]:
MAX_AREA = (24, 43, 35, 51)
tampines = filter_by_index(np.load('../filtered_data.npy'), MAX_AREA)

In [6]:
tampines_X = tampines[:,0:3]
tampines_y = tampines[:,3]

In [28]:
print(tampines_X.shape)
print(tampines_y.shape)

(2520, 3)
(2520,)


# Kernels

We are going to experiment with the `Matern32` kernel in this notebook.

## Matern32 Kernel

In [9]:
matern32 = GPy.kern.Matern32(3)

In [10]:
display(matern32)

Mat32.,value,constraints,priors
variance,1.0,+ve,
lengthscale,1.0,+ve,


## Periodic Kernel

In [36]:
periodic = GPy.kern.StdPeriodic(3)

In [37]:
display(periodic)

std_periodic.,value,constraints,priors
variance,1.0,+ve,
period,1.0,+ve,
lengthscale,1.0,+ve,


# Model

We define a Gaussian Process Regressor with our defined kernel.

In [42]:
m1 = GPy.models.GPRegression(tampines_X, tampines_y[:,None], matern32 * periodic)

In [None]:
display(m1)

GP_regression.,value,constraints,priors
mul.Mat32.variance,0.11228357504,+ve,
mul.Mat32.lengthscale,1.0,+ve,
mul.std_periodic.variance,1.0,+ve,
mul.std_periodic.period,1.0,+ve,
mul.std_periodic.lengthscale,1.0,+ve,
Gaussian_noise.variance,1.0,+ve,


In [None]:
m1.optimize(messages=True)

Running L-BFGS-B (Scipy implementation) Code:
  runtime   i      f              |g|        
    47s67  0007   2.903332e+03   3.392383e-03 
 02m36s12  0023   2.881880e+03   4.463562e+03 
 07m11s90  0062   2.845280e+03   1.479072e+03 



 07m19s75  0063   3.221759e+03           nan 



 07m34s27  0065   2.845280e+03   1.479072e+03 
Runtime:  07m34s27
Optimization status: Converged



In [None]:
m1.log_likelihood()