## Gaussian Process Regression wit GPy

In [13]:
import pandas as pd

### 1. Read the data

In [14]:
all_positions = pd.read_csv('data/positions_xyz_128px_full.csv', delimiter=',')
all_positions

Unnamed: 0,x,y,z
0,10.138330,3.169981,15.099980
1,10.133340,3.269852,15.199850
2,10.125010,3.369500,15.299500
3,10.113370,3.468816,15.398820
4,10.098430,3.567688,15.497690
...,...,...,...
25352,5.115520,4.872968,9.123255
25353,4.845301,5.128964,8.955405
25354,4.577969,5.373679,8.790463
25355,4.314191,5.604666,8.628434


In [15]:
all_red_ball_positions = pd.read_csv('data/red_ball_coordinates_128px_full.csv', delimiter=',')
all_red_ball_positions

Unnamed: 0,u0,v0,u1,v1,u2,v2,u3,v3,u4,v4,u5,v5,u6,v6
0,,,45.038462,83.307692,86.925926,39.333333,0.333333,41.500000,,,88.136364,82.727273,68.5,118.5
1,,,43.350000,83.850000,87.000000,40.111111,0.444444,42.555556,,,89.500000,84.500000,68.5,118.5
2,,,43.791667,84.666667,87.310345,41.034483,0.444444,43.444444,,,89.473684,85.631579,68.5,118.5
3,,,43.411765,85.705882,87.400000,41.400000,0.666667,44.500000,,,89.777778,86.500000,68.5,118.5
4,,,44.000000,86.285714,87.740741,42.000000,1.062500,45.437500,,,89.944444,87.222222,68.5,118.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25352,68.500000,118.166667,60.500000,69.000000,,,86.988235,82.435294,55.0,24.0,,,,
25353,68.500000,118.166667,60.500000,69.000000,,,92.730337,87.629213,55.0,24.0,,,,
25354,68.571429,118.857143,60.500000,69.000000,,,110.329545,101.409091,55.0,24.0,,,,
25355,68.571429,118.857143,60.500000,69.000000,,,104.370787,97.269663,55.0,24.0,,,,


In [16]:
xyz_positions = pd.read_csv('data/positions_xyz_128px_016.csv', delimiter=',')
xyz_positions = xyz_positions.iloc[4:]
red_ball_positions = pd.read_csv('data/red_ball_coordinates_128px_016.csv', delimiter=',')
red_ball_positions.shape,  xyz_positions.shape

((2132, 14), (2132, 3))

### 2. Handle Missing Values

In [17]:
from sklearn.impute import SimpleImputer

imputer = SimpleImputer(strategy='mean')
X_all = imputer.fit_transform(all_red_ball_positions)
y_all = imputer.fit_transform(all_positions)
X = imputer.fit_transform(red_ball_positions)
y = imputer.fit_transform(xyz_positions)

### 3. Prepare the Data

In [18]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size=0.2, random_state=42)

In [19]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((20285, 14), (5072, 14), (20285, 3), (5072, 3))

In [20]:
from sklearn.preprocessing import StandardScaler

scaler_x = StandardScaler()
scaler_y = StandardScaler()
X_scaled_train = scaler_x.fit_transform(X_train)
y_scaled_train = scaler_y.fit_transform(y_train)
X_scaled_train.shape, y_scaled_train.shape

((20285, 14), (20285, 3))

### 4. Train the Model

In [21]:
from GPy.kern import RBF
from GPy.models import GPRegression

#### 4.1 Train GP Regression Model with GPy
For normal regression tasks

In [22]:
kernel = RBF(input_dim=14)
model = GPRegression(X_scaled_train, y_scaled_train, kernel=kernel)
model.optimize(messages=True)

HBox(children=(VBox(children=(IntProgress(value=0, max=1000), HTML(value=''))), Box(children=(HTML(value=''),)…

<paramz.optimization.optimization.opt_lbfgsb at 0x1d9056515e0>

In [23]:
model

GP_regression.,value,constraints,priors
rbf.variance,0.6431966671118324,+ve,
rbf.lengthscale,1.2396500515619675,+ve,
Gaussian_noise.variance,0.1823203326898224,+ve,


#### 4.2 Train Sparse GP Regression Model with GPy
Model for large datasets

In [26]:
from GPy.models import SparseGPRegression

kernel = RBF(input_dim=14)
model_sparse = SparseGPRegression(X_scaled_train, y_scaled_train, kernel=kernel)
model_sparse.optimize(messages=True)

HBox(children=(VBox(children=(IntProgress(value=0, max=1000), HTML(value=''))), Box(children=(HTML(value=''),)…

<paramz.optimization.optimization.opt_lbfgsb at 0x1d905cfc460>

In [27]:
model_sparse

sparse_gp.,value,constraints,priors
inducing inputs,"(10, 14)",,
rbf.variance,0.050718490809828404,+ve,
rbf.lengthscale,2.985430865649114,+ve,
Gaussian_noise.variance,0.4333320047978111,+ve,


### 5. Make Predictions

In [28]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

X_scaled_test = scaler_x.transform(X_test)


#### 5.1 Predict with GP Regression Model

In [29]:
y_pred_reg, y_var_reg = model.predict(X_scaled_test)
y_pred_reg = scaler_y.inverse_transform(y_pred_reg)
y_pred_reg

array([[ 4.97588068,  5.43833573, 17.12134811],
       [ 4.36074451,  2.42182168,  4.59513669],
       [ 4.02376409,  4.00319297, 21.08715074],
       ...,
       [ 8.9079568 ,  2.85445672, 14.88765401],
       [11.28728314,  4.63910963, 16.00060106],
       [ 7.31107174,  4.25413908, 25.54014959]])

#### 5.2 Predict with Sparse GP Regression Model

In [32]:
y_pred_sparse, y_var_sparse = model_sparse.predict(X_scaled_test)
y_pred_sparse = scaler_y.inverse_transform(y_pred_sparse)
y_pred_sparse

array([[ 5.27052331,  4.53216214, 13.72141084],
       [ 8.28050932,  4.85833512, 10.89598265],
       [ 4.59446491,  6.05549976, 23.4967433 ],
       ...,
       [ 7.47589681,  2.92186687, 14.34090323],
       [ 6.46801688,  5.56822106, 12.52550554],
       [ 7.68892204,  3.84708334, 23.56854159]])

### 6. Evaluate the Model

In [33]:
def evaluate_model(y_test_evaluate, y_pred_evaluate):
    mse = mean_squared_error(y_test_evaluate, y_pred_evaluate)
    r2 = r2_score(y_test_evaluate, y_pred_evaluate)
    mae = mean_absolute_error(y_test_evaluate, y_pred_evaluate)
    return mse, r2, mae


#### 6.1 Evaluate GP Regression Model

In [34]:
mse_reg, r2_reg, mae_reg = evaluate_model(y_test, y_pred_reg)
print("MSE:", mse_reg)
print("R2:", r2_reg)
print("MAE:", mae_reg)

MSE: 3.876869191487007
R2: 0.8129562744025156
MAE: 0.985031984579637


#### 6.2 Evaluate Sparse GP Regression Model

In [36]:
mse_sparse, r2_sparse, mae_sparse = evaluate_model(y_test, y_pred_sparse)
print("MSE:", mse_sparse)
print("R2:", r2_sparse)
print("MAE:", mae_sparse)

MSE: 10.226186231277763
R2: 0.582556465156239
MAE: 2.076937317489964


### 7. Visualize results

In [ ]:
from utils.plot_maker_gaussian_process import plot_maker_3d

plot_maker_3d(y_test, y_pred_reg, "Ball Trajectory From GP Regression Model")
plot_maker_3d(y_test[500:550], y_pred_reg[500:550], "Ball Trajectory From GP Regression Model")
plot_maker_3d(y_test[1000:1050], y_pred_reg[1000:1050], "Ball Trajectory From GP Regression Model")