In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import GPy

from IPython.display import display
GPy.plotting.change_plotting_library('matplotlib')

# Helpers

In [2]:
def filter_coordinates(data, coordinate_limits):
    """
    Returns the filtered coordinates from the dataset.
    
    Params:
        - coordinate_limits (tuple): in the format (MIN_LAT, MIN_LNG, MAX_LAT, MAX_LNG)
    
    Returns:
        - numpy matrix that consists only of the filtered data
    """ 
    min_lat, min_lng, max_lat, max_lng = coordinate_limits
    
    f1 = data[data[:, 0] > min_lat]
    f2 = f1[f1[:, 0] < max_lat]
    f3 = f2[f2[:, 1] > min_lng]
    f4 = f3[f3[:, 1] < max_lng]
    
    return f4

In [3]:
def filter_by_index(data, coordinate_indexes):
    """
    Returns the coordinates that bounds the index of the location matrix.
    
    Params:
        - data (numpy matrix): dataset
        - coordinate_indexes (tuple): in the format: (ROW_MIN, COL_MIN, ROW_MAX, COL_MAX)
        
    Returns:
        - tuple of the following format: (MIN_LAT, MIN_LNG, MAX_LAT, MAX_LNG)
    """
    row_min, col_min, row_max, col_max = coordinate_indexes

    f1 = data[data[:, 4] > row_min]
    f2 = f1[f1[:, 4] < row_max]
    f3 = f2[f2[:, 5] > col_min]
    f4 = f3[f3[:, 5] < col_max]

    return f4

# Dataset

In [4]:
MAX_AREA = (24, 43, 35, 51)
tampines = filter_by_index(np.load('../filtered_data.npy'), MAX_AREA)

In [5]:
tampines_X = tampines[:,0:3]
tampines_y = tampines[:,3]

In [6]:
print(tampines_X.shape)
print(tampines_y.shape)

(2520, 3)
(2520,)


# Kernels

We are going to experiemnt with combinations of linear and periodic kernels in this notebook.

## Linear Kernel

In [7]:
linear = GPy.kern.Linear(3)

In [8]:
display(linear)

linear.,value,constraints,priors
variances,1.0,+ve,


## Periodic Kernel

In [9]:
periodic = GPy.kern.StdPeriodic(3)

In [10]:
display(periodic)

std_periodic.,value,constraints,priors
variance,1.0,+ve,
period,1.0,+ve,
lengthscale,1.0,+ve,


## Combinations of Kernels

In [11]:
k1 = linear + periodic
k2 = linear * periodic

# Model

We define a Gaussian Process Regressor with our defined kernel.

## Addition of Kernels

In [12]:
m1 = GPy.models.GPRegression(tampines_X, tampines_y[:,None], k1)

In [13]:
display(m1)

GP_regression.,value,constraints,priors
sum.linear.variances,1.0,+ve,
sum.std_periodic.variance,1.0,+ve,
sum.std_periodic.period,1.0,+ve,
sum.std_periodic.lengthscale,1.0,+ve,
Gaussian_noise.variance,1.0,+ve,


In [14]:
m1.optimize(messages=True)

Running L-BFGS-B (Scipy implementation) Code:
  runtime   i      f              |g|        
    15s39  0003   3.255074e+04   2.471414e-01 
    55s43  0011   3.016696e+03   1.354327e+12 
 02m37s57  0032   3.016675e+03   1.388048e+05 
Runtime:  02m37s57
Optimization status: Converged



<paramz.optimization.optimization.opt_lbfgsb at 0x1095d1c50>

In [15]:
m1.log_likelihood()

-3016.6747968241307

## Multiplication of Kernels

In [16]:
m2 = GPy.models.GPRegression(tampines_X, tampines_y[:,None], k1)

In [17]:
display(m2)

GP_regression.,value,constraints,priors
sum.linear.variances,2.58747165968e-70,+ve,
sum.std_periodic.variance,1.00001131384,+ve,
sum.std_periodic.period,1.01968572895,+ve,
sum.std_periodic.lengthscale,0.999957409447,+ve,
Gaussian_noise.variance,1.0,+ve,


In [18]:
m2.optimize(messages=True)

Running L-BFGS-B (Scipy implementation) Code:
  runtime   i      f              |g|        
    17s03  0003   3.032444e+03   1.359169e+10 
    51s12  0010   3.023634e+03   1.321252e+05 
Runtime:     51s12
Optimization status: Converged



<paramz.optimization.optimization.opt_lbfgsb at 0x1095d19b0>

In [19]:
m2.log_likelihood()

-3023.63358413169