# Training neural network to predict ball position in a 3d space
## 1. Importing Libraries

In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.pipeline import Pipeline
from utils.evaluate import evaluate_model
from utils.plot_maker import plot_maker_3d

## 2. Importing Data

In [2]:
# get all the red ball data
X_rec1 = pd.read_csv('../data-12-cams/red_ball_coordinates_128px_2024-03-20_08h55m.csv', decimal='.', delimiter=',')
X_rec2 = pd.read_csv('../data-12-cams/red_ball_coordinates_128px_2024-03-20_08h56m.csv', decimal='.', delimiter=',')
X_rec3 = pd.read_csv('../data-12-cams/red_ball_coordinates_128px_2024-03-20_09h16m.csv', decimal='.', delimiter=',')
X_rec4 = pd.read_csv('../data-12-cams/red_ball_coordinates_128px_2024-03-20_09h49m.csv', decimal='.', delimiter=',')
X_rec5 = pd.read_csv('../data-12-cams/red_ball_coordinates_128px_2024-03-20_09h51m.csv', decimal='.', delimiter=',')
X_rec6 = pd.read_csv('../data-12-cams/red_ball_coordinates_128px_2024-03-20_09h52m.csv', decimal='.', delimiter=',')
X_rec7 = pd.read_csv('../data-12-cams/red_ball_coordinates_128px_2024-03-20_09h53m.csv', decimal='.', delimiter=',')
X_rec8 = pd.read_csv('../data-12-cams/red_ball_coordinates_128px_2024-03-20_09h54m.csv', decimal='.', delimiter=',')
X_rec9 = pd.read_csv('../data-12-cams/red_ball_coordinates_128px_2024-03-20_09h55m.csv', decimal='.', delimiter=',')
X_rec10 = pd.read_csv('../data-12-cams/red_ball_coordinates_128px_2024-03-20_09h58m.csv', decimal='.', delimiter=',')
X_rec_11 = pd.read_csv('../data-12-cams/red_ball_coordinates_128px_2024-03-20_09h59m.csv', decimal='.', delimiter=',')

In [3]:
# get all the positions data
y_rec1 = pd.read_csv('../data-12-cams/positions_xyz_2024-03-20_08h55m.csv', decimal='.', delimiter=',').iloc[4:]
y_rec2 = pd.read_csv('../data-12-cams/positions_xyz_2024-03-20_08h56m.csv', decimal='.', delimiter=',').iloc[4:]
y_rec3 = pd.read_csv('../data-12-cams/positions_xyz_2024-03-20_09h16m.csv', decimal='.', delimiter=',').iloc[4:]
y_rec4 = pd.read_csv('../data-12-cams/positions_xyz_2024-03-20_09h49m.csv', decimal='.', delimiter=',').iloc[4:]
y_rec5 = pd.read_csv('../data-12-cams/positions_xyz_2024-03-20_09h51m.csv', decimal='.', delimiter=',').iloc[4:]
y_rec6 = pd.read_csv('../data-12-cams/positions_xyz_2024-03-20_09h52m.csv', decimal='.', delimiter=',').iloc[4:]
y_rec7 = pd.read_csv('../data-12-cams/positions_xyz_2024-03-20_09h53m.csv', decimal='.', delimiter=',').iloc[4:]
y_rec8 = pd.read_csv('../data-12-cams/positions_xyz_2024-03-20_09h54m.csv', decimal='.', delimiter=',').iloc[4:]
y_rec9 = pd.read_csv('../data-12-cams/positions_xyz_2024-03-20_09h55m.csv', decimal='.', delimiter=',').iloc[4:]
y_rec10 = pd.read_csv('../data-12-cams/positions_xyz_2024-03-20_09h58m.csv', decimal='.', delimiter=',').iloc[4:]
y_rec_11 = pd.read_csv('../data-12-cams/positions_xyz_2024-03-20_09h59m.csv', decimal='.', delimiter=',').iloc[4:]

In [4]:
(X_rec1.shape, y_rec1.shape), (X_rec2.shape, y_rec2.shape), (X_rec3.shape, y_rec3.shape), (X_rec4.shape, y_rec4.shape), (X_rec5.shape, y_rec5.shape), (X_rec6.shape, y_rec6.shape), (X_rec7.shape, y_rec7.shape), (X_rec8.shape, y_rec8.shape), (X_rec9.shape, y_rec9.shape), (X_rec10.shape, y_rec10.shape), (X_rec_11.shape, y_rec_11.shape)

(((979, 24), (979, 3)),
 ((999, 24), (999, 3)),
 ((999, 24), (999, 3)),
 ((1000, 24), (1000, 3)),
 ((1014, 24), (1014, 3)),
 ((1017, 24), (1017, 3)),
 ((1011, 24), (1011, 3)),
 ((1066, 24), (1066, 3)),
 ((1000, 24), (1000, 3)),
 ((1014, 24), (1014, 3)),
 ((1003, 24), (1003, 3)))

In [5]:
print(f"The amount of NaN in rec1: {X_rec1.isna().sum().sum()}")
print(f"The amount of NaN in rec2: {X_rec2.isna().sum().sum()}")
print(f"The amount of NaN in rec3: {X_rec3.isna().sum().sum()}")
print(f"The amount of NaN in rec4: {X_rec4.isna().sum().sum()}")
print(f"The amount of NaN in rec5: {X_rec5.isna().sum().sum()}")
print(f"The amount of NaN in rec6: {X_rec6.isna().sum().sum()}")
print(f"The amount of NaN in rec7: {X_rec7.isna().sum().sum()}")
print(f"The amount of NaN in rec8: {X_rec8.isna().sum().sum()}")
print(f"The amount of NaN in rec9: {X_rec9.isna().sum().sum()}")
print(f"The amount of NaN in rec10: {X_rec10.isna().sum().sum()}")
print(f"The amount of NaN in rec11: {X_rec_11.isna().sum().sum()}")

The amount of NaN in rec1: 0
The amount of NaN in rec2: 10
The amount of NaN in rec3: 2152
The amount of NaN in rec4: 8
The amount of NaN in rec5: 8320
The amount of NaN in rec6: 8340
The amount of NaN in rec7: 10
The amount of NaN in rec8: 80
The amount of NaN in rec9: 2
The amount of NaN in rec10: 0
The amount of NaN in rec11: 18


### 2.3 Combine all the datasets

In [6]:
X_rec3.isna().sum()

u0     129
v0     129
u1       0
v1       0
u2       0
v2       0
u3     130
v3     130
u4     132
v4     132
u5     129
v5     129
u6     130
v6     130
u7     133
v7     133
u8     128
v8     128
u9     133
v9     133
u10     32
v10     32
u11      0
v11      0
dtype: int64

In [7]:
combo_1.dropna(inplace=True, thresh=31)
combo_2.dropna(inplace=True, thresh=31)
combo_3.dropna(inplace=True, thresh=31)
combo_4.dropna(inplace=True, thresh=31)
combo_5.dropna(inplace=True, thresh=31)
combo_6.dropna(inplace=True, thresh=31)
combo_7.dropna(inplace=True, thresh=31)
combo_8.dropna(inplace=True, thresh=31)
combo_9.dropna(inplace=True, thresh=31)
combo_10.dropna(inplace=True, thresh=31)

NameError: name 'combo_1' is not defined

In [None]:
def splitup(combined_df):
    X_df = combined_df.iloc[:, :14]
    y_df = combined_df.iloc[:, 14:]
    return X_df, y_df

X_rec1, y_rec1 = splitup(combo_1)
X_rec2, y_rec2 = splitup(combo_2)
X_rec3, y_rec3 = splitup(combo_3)
X_rec4, y_rec4 = splitup(combo_4)
X_rec5, y_rec5 = splitup(combo_5)
X_rec6, y_rec6 = splitup(combo_6)
X_rec7, y_rec7 = splitup(combo_7)
X_rec8, y_rec8 = splitup(combo_8)
X_rec9, y_rec9 = splitup(combo_9)
X_rec10, y_rec10 = splitup(combo_10)

In [None]:
from utils.interpolation import interpolate_dataframe

X_rec1 = interpolate_dataframe(X_rec1)
X_rec2 = interpolate_dataframe(X_rec2)
X_rec3 = interpolate_dataframe(X_rec3)
X_rec4 = interpolate_dataframe(X_rec4)
X_rec5 = interpolate_dataframe(X_rec5)
X_rec6 = interpolate_dataframe(X_rec6)
X_rec7 = interpolate_dataframe(X_rec7)
X_rec8 = interpolate_dataframe(X_rec8)
X_rec9 = interpolate_dataframe(X_rec9)
X_rec10 = interpolate_dataframe(X_rec10)

In [None]:
X_train = pd.concat([X_rec1, X_rec2, X_rec3, X_rec4, X_rec5, X_rec6, X_rec7, X_rec8, X_rec9, X_rec10])
y_train = pd.concat([y_rec1, y_rec2, y_rec3, y_rec4, y_rec5, y_rec6, y_rec7, y_rec8, y_rec9, y_rec10])
X_train.reset_index(drop=True, inplace=True)
y_train.reset_index(drop=True, inplace=True)

In [None]:
X_train.isna().sum()

In [None]:
X_train.shape, y_train.shape

In [None]:
combined = pd.concat([X_train, y_train], axis=1)

## 3. Data Preprocessing

In [None]:
combined

In [None]:
# check for missing values
print(f'The data has missing values: {combined.isna().sum().sum()}')
# drop all rows that have more then 3 NaN values
combined.dropna(inplace=True)
print(f'The data has missing values: {combined.isna().sum().sum()}')

In [None]:
X = combined.iloc[:, :32]
y = combined.iloc[:, 32:]

In [None]:
X.shape, y.shape

In [None]:
# scale the data
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_x.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)

## 4. Hyperparameter Tuning

In [None]:
# create a pipeline
pipeline = Pipeline([
    ('mlp', MLPRegressor(random_state=42))
])

In [None]:
# Best parameters: {'mlp__solver': 'adam', 'mlp__max_iter': 28000, 'mlp__learning_rate_init': 0.01, 'mlp__learning_rate': 'adaptive', 'mlp__hidden_layer_sizes': (50, 100, 50), 'mlp__early_stopping': True, 'mlp__alpha': 0.001, 'mlp__activation': 'relu'}
#model = MLPRegressor(random_state=42, max_iter=28000, learning_rate_init=0.01, learning_rate='adaptive', hidden_layer_sizes=(50, 100, 50), early_stopping=True, alpha=0.001, activation='relu')

param_grid = {
    'mlp__hidden_layer_sizes': 
        [(50, 100, 50) # ,(100,), (64, 32, 64)],
         ],
    'mlp__activation': ['identity', 'tanh', 'relu'],
    'mlp__solver': ['sgd', 'adam'],
    'mlp__alpha': [0.001, 0.01, 0.1],
    'mlp__max_iter': [10000, 20000, 28000],
    'mlp__learning_rate': ['constant', 'adaptive'],
    'mlp__learning_rate_init': [0.001, 0.01, 0.1],
    'mlp__early_stopping': [True],
}

random_search = RandomizedSearchCV(pipeline, param_distributions=param_grid, cv=5, n_iter=100, n_jobs=-1, verbose=3, random_state=42)

random_search.fit(X_scaled, y_scaled)

In [None]:
print(f'Best parameters: {random_search.best_params_}')
print(f'Best score: {random_search.best_score_}')

In [None]:
model = random_search.best_estimator_

## 5. Training the Model

In [None]:
model.fit(X_scaled, y_scaled)

## 6. Make Predictions On Test Data

In [None]:
import pandas as pd

In [None]:
X_test_8_a = pd.read_csv('../data-16-cams/red_ball_coordinates_128px_2024-03-19_17h30m.csv', decimal='.', delimiter=',')
X_test_8_b = pd.read_csv('../data-16-cams/red_ball_coordinates_128px_2024-03-19_17h31m.csv', decimal='.', delimiter=',')

In [None]:
y_test_8_a = pd.read_csv('../data-16-cams/positions_xyz_2024-03-19_17h30m.csv', decimal='.', delimiter=',').iloc[:-2]
y_test_8_b = pd.read_csv('../data-16-cams/positions_xyz_2024-03-19_17h31m.csv', decimal='.', delimiter=',').iloc[:-2]

In [None]:
(X_test_8_a.shape, y_test_8_a.shape), (X_test_8_b.shape, y_test_8_b.shape)

In [None]:
combo_test_a = pd.concat([X_test_8_a, y_test_8_a], axis=1)
combo_test_b = pd.concat([X_test_8_b, y_test_8_b], axis=1)

In [None]:
#combo_test_a.dropna(inplace=True, thresh=31)
#combo_test_b.dropna(inplace=True, thresh=31)

In [None]:
X_test_8_a, y_test_8_a = splitup(combo_test_a)
X_test_8_b, y_test_8_b = splitup(combo_test_b)

In [None]:
X_test_8_a = interpolate_dataframe(X_test_8_a)
X_test_8_b = interpolate_dataframe(X_test_8_b)

In [None]:
(X_test_8_a.shape, y_test_8_a.shape), (X_test_8_b.shape, y_test_8_b.shape)

In [None]:
def prepare_and_scale_test_data(X_test_val, y_test_val, scaler_x_test):
    combined_test = pd.concat([X_test_val, y_test_val], axis=1)
    combined_test.dropna(inplace=True)
    X_val = combined_test.iloc[:, :32]
    y_val = combined_test.iloc[:, 32:]
    X_scaled_test = scaler_x_test.transform(X_val)
    return X_scaled_test, y_val

In [None]:
def predict_evaluate_plot(X_test_val_scaled, y_test_val, nn_model, scaler_y_test, title='Trajectory of Ball in 3D Space'):
    y_test_predicted_scaled = nn_model.predict(X_test_val_scaled)
    y_test_pred = scaler_y_test.inverse_transform(y_test_predicted_scaled)
    mse, mae, r2 = evaluate_model(y_test_val, y_test_pred, 'Test Data')
    # change col 1 and 2 from place, so it corresponds to the unity 3d space
    y_test_pred = y_test_pred[:, [0, 2, 1]]
    y_test_val = y_test_val.to_numpy()[:, [0, 2, 1]]
    plot_maker_3d(y_test_val, y_test_pred, title)
    return y_test_pred, mse, mae, r2

In [None]:
X_test_8_scaled_a, y_test_8_a = prepare_and_scale_test_data(X_test_8_a, y_test_8_a, scaler_x)
X_test_8_pred_a, mse_8_a, mae_8_a, r2_8_a = predict_evaluate_plot(X_test_8_scaled_a, y_test_8_a, model, scaler_y, title='Trajectory of Ball in 3D Space - 8-motion a')

In [None]:
X_test_8_scaled_b, y_test_8_b = prepare_and_scale_test_data(X_test_8_b, y_test_8_b, scaler_x)
X_test_8_pred_b, mse_8_b, mae_8_b, r2_8_b = predict_evaluate_plot(X_test_8_scaled_b, y_test_8_b, model, scaler_y, title='Trajectory of Ball in 3D Space - 8-motion b')