In [2]:
%matplotlib inline
import numpy as np
import pandas as pd
import GPy

from IPython.display import display
GPy.plotting.change_plotting_library('matplotlib')

# Helpers

In [3]:
def filter_by_coordinates(data, coordinate_limits):
    """
    Returns the filtered coordinates from the dataset.
    
    Params:
        - coordinate_limits (tuple): in the format (MIN_LAT, MIN_LNG, MAX_LAT, MAX_LNG)
    
    Returns:
        - numpy matrix that consists only of the filtered data
    """ 
    min_lat, min_lng, max_lat, max_lng = coordinate_limits
    
    f1 = data[data[:, 0] > min_lat]
    f2 = f1[f1[:, 0] < max_lat]
    f3 = f2[f2[:, 1] > min_lng]
    f4 = f3[f3[:, 1] < max_lng]

    return f4

In [4]:
def filter_by_index(data, coordinate_indexes):
    """
    Returns the coordinates that bounds the index of the location matrix.
    
    Params:
        - data (numpy matrix): dataset
        - coordinate_indexes (tuple): in the format: (ROW_MIN, COL_MIN, ROW_MAX, COL_MAX)
        
    Returns:
        - tuple of the following format: (MIN_LAT, MIN_LNG, MAX_LAT, MAX_LNG)
    """
    row_min, col_min, row_max, col_max = coordinate_indexes

    f1 = data[data[:, 4] >= row_min]
    f2 = f1[f1[:, 4] < row_max]
    f3 = f2[f2[:, 5] >= col_min]
    f4 = f3[f3[:, 5] < col_max]

    return f4

In [5]:
def ith_coords(dataset, i):
    return dataset[i][0], dataset[i][1]

In [6]:
def plot_slices_per_location(model, *coords):
    """
    Plot the number of bikes at coordinates over time given a GP.
    
    Params:
        - model (GPy model): model to sample from
        - *coords (list(tuples)): list of tuples, with each tuple in the format (LAT, LNG)
    """
    for idx, coord in enumerate(coords):
        lat, lng = coord        

        figure = GPy.plotting.plotting_library().figure(idx+1)
        canvas = model.plot(figure=figure, fixed_inputs=[(0, lat), (1, lng)])

    GPy.plotting.show(canvas)

In [7]:
def plot_slices_per_timeslice(model, *timeslices):
    """
    Plot a density graph of bikes at coordinates for a given time.
    
    Params:
        - model (GPy model): model to sample from
        - *timeslices (list(int)): list of timeslices in UNIX timestamp
    """
    
    for idx, timestamp in enumerate(timeslices):
        figure = GPy.plotting.plotting_library().figure(idx+1)
        canvas = model.plot(figure=figure, fixed_inputs=[(2, timestamp)])

    GPy.plotting.show(canvas)

In [8]:
def load_data(filename, top_left=None, bottom_right=None):
    MAX_AREA = (BOTTOM_RIGHT[0], TOP_LEFT[1], TOP_LEFT[0], BOTTOM_RIGHT[1])
    data = filter_by_coordinates(np.load(filename), MAX_AREA)
    
    X, y = data[:,0:3], data[:,3]
    
    return X, y

In [16]:
def run_gp(dataset, kernel, top_left=None, bottom_right=None):
    """
    Runs the entire GP as a single method and returns the trained model. It is possible to continue
    training it with restarts after the initial training.
    
    Params:
        - dataset (str): name of dataset in `.npy` format to train the GP
        - kernel: GPy kernel object that defines the covariance function of our GP
        - top_left (tuple): (LAT, LNG) of the top left corner of our area concerned
        - bottom_right (tuple): (LAT, LNG) of the bottom right corner of our area concerned

    Returns:
        - a trained GPy model
    """
    X, y = load_data(dataset, top_left=top_left, bottom_right=bottom_right)
    
    print('Training GP...')
    print(f'X shape: {X.shape}')
    print(f'y shape: {y.shape}')
    
    model = GPy.models.GPRegression(X, y[:,None], kernel)
    model.optimize(messages=True)
    
    display(model)
    
    return model

# Kernels

We are going to experiment with the `Matern32` kernel in this notebook.

## Matern32 Kernel

In [11]:
matern32 = GPy.kern.Matern32(3)

In [12]:
display(matern32)

Mat32.,value,constraints,priors
variance,1.0,+ve,
lengthscale,1.0,+ve,


## Periodic Kernel

In [13]:
periodic = GPy.kern.StdPeriodic(3)

In [14]:
display(periodic)

std_periodic.,value,constraints,priors
variance,1.0,+ve,
period,1.0,+ve,
lengthscale,1.0,+ve,


# Run Gaussian Process

In [None]:
TOP_LEFT = (1.395520, 103.889045)
BOTTOM_RIGHT = (1.390112, 103.896281)

model = run_gp('../data/filtered_data.npy', periodic + matern32, top_left=TOP_LEFT, bottom_right=BOTTOM_RIGHT)

Training GP...
X shape: (7344, 3)
y shape: (7344,)
