## Trajectory prediction for each dimension separately
## With Linear Regression

#### Read the test data

In [1]:
import pandas as pd

red_ball_fill = pd.read_csv('../test-data/red_ball_coordinates_128px_2024-02-27_13h38m.csv', delimiter=',', decimal='.')
red_ball_fill

Unnamed: 0,u0,v0,u1,v1,u2,v2,u3,v3,u4,v4,u5,v5,u6,v6
0,36.0,40.0,125.0,41.0,33.5,84.0,78.0,83.0,126.0,85.0,,,69.0,121.0
1,36.0,41.0,124.5,42.0,33.0,85.0,77.5,84.0,126.0,87.0,,,69.0,120.5
2,36.0,42.0,124.0,44.0,33.0,86.0,77.5,85.0,126.0,88.0,,,69.0,120.5
3,36.0,43.5,124.0,45.0,33.0,87.0,77.0,86.0,125.5,89.0,,,69.0,120.0
4,36.0,45.0,123.0,46.0,33.0,89.0,77.0,87.0,126.0,91.0,,,69.0,120.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1469,88.0,6.0,56.0,24.0,83.5,54.0,12.0,72.0,60.5,70.0,74.0,98.0,68.5,118.5
1470,88.5,6.0,56.5,24.0,83.5,54.0,11.5,71.0,60.5,70.0,74.0,98.0,68.5,118.5
1471,88.0,7.0,56.5,24.0,83.5,54.0,11.0,71.5,60.5,70.0,74.0,98.0,68.5,118.5
1472,88.0,7.0,56.5,24.0,83.5,54.5,11.0,71.5,60.5,70.0,74.0,98.0,68.5,118.5


#### Split the test data into NaN and non-NaN rows

In [2]:
red_ball_nan_rows = red_ball_fill[red_ball_fill.u0.isna() & red_ball_fill.v0.isna()]
red_ball_nan_rows

Unnamed: 0,u0,v0,u1,v1,u2,v2,u3,v3,u4,v4,u5,v5,u6,v6
360,,,69.0,21.0,,,26.0,66.0,71.0,66.0,,,82.0,112.0
361,,,68.0,22.0,,,26.0,68.0,71.0,67.0,,,82.0,114.0
362,,,69.0,23.0,,,26.0,69.0,71.0,68.0,,,82.0,115.0
366,,,68.0,27.0,,,26.0,71.5,70.0,71.0,,,82.0,118.0
367,,,68.0,27.0,,,26.0,72.0,70.0,72.0,,,82.0,120.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
739,,,86.0,23.0,,,43.0,68.0,87.0,68.0,,,101.0,115.0
740,,,86.0,23.0,,,43.0,69.0,88.0,69.0,,,101.0,116.0
744,,,86.0,27.0,,,43.0,73.0,88.0,72.5,,,102.0,120.5
745,,,86.0,29.0,,,43.5,73.0,88.0,73.0,,,102.0,121.0


#### Read the training data

In [3]:
red_ball_all = pd.read_csv('../data/red_ball_coordinates_128px_full.csv', delimiter=',', decimal='.')
red_ball_all_test = pd.read_csv('../test-data/red_ball_coordinates_128px_full.csv', delimiter=',', decimal='.')
red_ball_all = pd.concat([red_ball_all, red_ball_all_test])
red_ball_non_nan_rows = red_ball_all[~red_ball_all.index.isin(red_ball_nan_rows.index)]
# drop all the rows where u0 and v0 are NaN
red_ball_non_nan_rows = red_ball_non_nan_rows.dropna()
red_ball_non_nan_rows

Unnamed: 0,u0,v0,u1,v1,u2,v2,u3,v3,u4,v4,u5,v5,u6,v6
95,65.714286,81.535714,106.952381,82.785714,55.0,23.5,68.411765,37.617647,55.000000,128.000000,59.0,69.0,68.500000,118.166667
96,66.312500,81.062500,106.972222,81.638889,55.0,23.5,68.235294,37.029412,54.727273,127.454545,59.0,69.0,68.500000,118.166667
97,66.250000,79.750000,107.400000,80.900000,55.0,23.5,68.750000,35.750000,55.347826,126.782609,59.0,69.0,68.500000,118.166667
98,66.411765,79.382353,108.255814,80.116279,55.0,23.5,68.750000,35.250000,55.928571,126.642857,59.0,69.0,68.500000,118.166667
99,66.625000,78.125000,108.581395,79.209302,55.0,23.5,69.470588,33.676471,56.542857,125.542857,59.0,69.0,68.428571,119.428571
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4130,5.500000,1.000000,114.500000,1.500000,4.0,55.0,61.000000,56.000000,120.000000,55.000000,1.0,112.0,124.000000,113.000000
4131,5.000000,2.000000,114.000000,2.000000,4.0,55.0,60.000000,56.000000,119.000000,55.000000,1.0,113.0,123.000000,113.000000
4132,5.000000,2.000000,114.000000,2.000000,4.0,56.0,60.000000,56.000000,119.000000,56.000000,0.0,113.0,123.000000,114.000000
4133,5.000000,2.000000,113.000000,2.000000,4.0,57.0,60.000000,57.000000,119.000000,57.000000,0.5,114.0,123.000000,114.000000


Training data is u1, v1, u2, v2, ... u6, v6
Test data is u0, v0
The AI model is to predict the NaN values in the test data so the missing values are predicted

In [4]:
X_train = red_ball_non_nan_rows.drop(['u0', 'v0'], axis=1)
y_train = red_ball_non_nan_rows[['u0', 'v0']]
X_test = red_ball_nan_rows.drop(['u0', 'v0'], axis=1)

In [5]:
from sklearn.impute import KNNImputer

imputer_knn = KNNImputer(n_neighbors=2)
#X_train_imputed = imputer_knn.fit_transform(X_train)
X_test_imputed = imputer_knn.fit_transform(X_test)

In [6]:
X_train

Unnamed: 0,u1,v1,u2,v2,u3,v3,u4,v4,u5,v5,u6,v6
95,106.952381,82.785714,55.0,23.5,68.411765,37.617647,55.000000,128.000000,59.0,69.0,68.500000,118.166667
96,106.972222,81.638889,55.0,23.5,68.235294,37.029412,54.727273,127.454545,59.0,69.0,68.500000,118.166667
97,107.400000,80.900000,55.0,23.5,68.750000,35.750000,55.347826,126.782609,59.0,69.0,68.500000,118.166667
98,108.255814,80.116279,55.0,23.5,68.750000,35.250000,55.928571,126.642857,59.0,69.0,68.500000,118.166667
99,108.581395,79.209302,55.0,23.5,69.470588,33.676471,56.542857,125.542857,59.0,69.0,68.428571,119.428571
...,...,...,...,...,...,...,...,...,...,...,...,...
4130,114.500000,1.500000,4.0,55.0,61.000000,56.000000,120.000000,55.000000,1.0,112.0,124.000000,113.000000
4131,114.000000,2.000000,4.0,55.0,60.000000,56.000000,119.000000,55.000000,1.0,113.0,123.000000,113.000000
4132,114.000000,2.000000,4.0,56.0,60.000000,56.000000,119.000000,56.000000,0.0,113.0,123.000000,114.000000
4133,113.000000,2.000000,4.0,57.0,60.000000,57.000000,119.000000,57.000000,0.5,114.0,123.000000,114.000000


#### Train the models

In [7]:
from sklearn.linear_model import LinearRegression
import numpy as np

In [8]:
model_x = LinearRegression()
model_x.fit(X_train, y_train.u0)

In [9]:
y_pred_x = model_x.predict(X_test_imputed)



ValueError: X has 8 features, but LinearRegression is expecting 12 features as input.

In [None]:
model_y = LinearRegression()
model_y.fit(X_train, y_train.v0)

In [None]:
y_pred_y = model_y.predict(X_test_imputed)

#### Plot the imputed values

In [None]:
nan_order = red_ball_nan_rows.index.values
non_nan_order = red_ball_fill.index.values

In [None]:
from matplotlib import pyplot as plt

plt.figure(figsize=(10, 5))
plt.scatter(non_nan_order, red_ball_fill.u0, color='blue', label='Non-NaN Values')
plt.scatter(nan_order, y_pred_x, color='red', marker='x', label='Imputed Values')
plt.xlabel('Order')
plt.ylabel('u0')
plt.title('Imputed u0 Values Over Order')
plt.legend()
plt.show()


In [None]:
from matplotlib import pyplot as plt

plt.figure(figsize=(10, 5))
plt.scatter(non_nan_order, red_ball_fill.v0, color='blue', label='Non-NaN Values')
plt.scatter(nan_order, y_pred_y, color='red', marker='x', label='Imputed Values')
plt.xlabel('Order')
plt.ylabel('u0')
plt.title('Imputed u0 Values Over Order')
plt.legend()
plt.show()


In [None]:
y_pred = np.column_stack((y_pred_x, y_pred_y))

In [None]:
import matplotlib.pyplot as plt

# make a 2d plot with the old values in blue and the new in red
plt.scatter(red_ball_fill.u0, red_ball_fill.v0, color='blue')
plt.scatter(y_pred[:, 0], y_pred[:, 1], color='red')
plt.xlim(0, 128)
plt.ylim(0, 128)
plt.show()