In [55]:
import math
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.impute import KNNImputer
from sklearn.metrics import accuracy_score
from mpl_toolkits import mplot3d
import matplotlib.pyplot as plt

Reading both large datasets

In [56]:
Y = pd.read_csv("data/positions_xyz_128px_full.csv", delimiter=",", decimal=".")
Y.info()
X = pd.read_csv("data/red_ball_coordinates_128px_full.csv", delimiter=",", decimal=".")
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25357 entries, 0 to 25356
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   x       25357 non-null  float64
 1   y       25357 non-null  float64
 2   z       25357 non-null  float64
dtypes: float64(3)
memory usage: 594.4 KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25357 entries, 0 to 25356
Data columns (total 14 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   u0      24529 non-null  float64
 1   v0      24529 non-null  float64
 2   u1      17650 non-null  float64
 3   v1      17650 non-null  float64
 4   u2      19121 non-null  float64
 5   v2      19121 non-null  float64
 6   u3      22321 non-null  float64
 7   v3      22321 non-null  float64
 8   u4      18223 non-null  float64
 9   v4      18223 non-null  float64
 10  u5      20155 non-null  float64
 11  v5      20155 non-null  float64
 12  u6      21088 non-null  float64
 

Imputing data with KNN imputer

In [57]:
impute_knn = KNNImputer(n_neighbors=6)
X = impute_knn.fit_transform(X)

Imuting data with iterative imputer

In [58]:
#impute_it = IterativeImputer()
#X = impute_it.fit_transform(X)

Splitting data into training and test sets

In [59]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=42)

Setting parameters for NN

In [60]:
params = {'hidden_layer_sizes': [10, 10],
          'activation': 'relu', 'solver': 'adam',
          'alpha': 0.0, 'batch_size': 10,
          'random_state': 0, 'tol': 0.0001,
          'nesterovs_momentum': False,
          'learning_rate': 'constant',
          'learning_rate_init': 0.01,
          'max_iter': 1000, 'shuffle': True,
          'n_iter_no_change': 50, 'verbose': False}

In [61]:
net = MLPRegressor(**params)

In [62]:
net.fit(X_train, y_train)

In [63]:
predicted = net.predict(X_test)
predicted = pd.DataFrame(predicted, columns=["x", "y", "z"])

In [64]:
predicted

Unnamed: 0,x,y,z
0,5.743055,3.259425,16.922620
1,6.399847,4.535565,8.254164
2,4.124415,4.751838,22.376676
3,3.901072,4.633940,21.099661
4,5.600364,3.580954,17.373684
...,...,...,...
8363,3.342880,2.782362,10.974066
8364,6.399847,4.535565,8.254164
8365,7.374023,2.970358,19.724340
8366,12.133883,-0.721690,22.111061


In [65]:
y_test.reset_index(drop=True)

Unnamed: 0,x,y,z
0,5.259943,5.538167,16.589180
1,8.371517,1.922154,1.468246
2,3.754902,4.235989,20.924100
3,1.002142,0.272859,25.126870
4,5.817245,5.141342,13.533560
...,...,...,...
8363,1.623831,5.222513,14.224810
8364,5.759055,4.240714,5.749563
8365,6.851942,6.056138,17.986140
8366,8.675189,1.662587,13.930320


# Statistics of this model

Accuracy

In [66]:
# Calculate accuracy
#accuracy = accuracy_score(y_test.x, predicted.x)
#print("Accuracy:", accuracy)

ValueError: continuous is not supported

3d scatterplot

In [None]:
fig = plt.figure(figsize=(10, 7))
ax = plt.axes(projection="3d")
ax.scatter3D(predicted.x, predicted.y, predicted.z, color="green")
ax.scatter3D(y_test.x, y_test.y, y_test.z, color="blue")
plt.title("simple 3D scatter plot")

# show plot
plt.show()


Measure the difference between how good the prediction is versus the actual trajectory

In [68]:
def calculateDifferenceBetweenPredictionAndActualValues(y_test, predicted):
    difference = pd.DataFrame(columns=["x", "y", "z"])
    for i in range(len(predicted)):
        row = [0, 0, 0]
        row[0] = predicted["x"].values[i] - y_test["x"].values[i]
        row[1] = predicted["y"].values[i] - y_test["y"].values[i]
        row[2] = predicted["z"].values[i] - y_test["z"].values[i]
        difference.loc[len(difference.index)] = row
    return difference

Calculate mean deviation between actual and predict values by each axis

In [69]:
def calculateMeanDeviationByEachAxis(difference):
    row = [0, 0, 0]
    row[0] = sum(abs(difference["x"]))/len(difference)
    row[1] = sum(abs(difference["y"]))/len(difference)
    row[2] = sum(abs(difference["z"]))/len(difference)
    deviation = pd.DataFrame(columns=["x", "y", "z"])
    deviation.loc[len(deviation.index)] = row
    return deviation

In [70]:
difference = calculateDifferenceBetweenPredictionAndActualValues(y_test, predicted)
deviation = calculateMeanDeviationByEachAxis(difference)
deviation

Unnamed: 0,x,y,z
0,2.129445,2.040267,3.153159
