In [None]:
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import KFold, cross_val_score

from utils.data_generation import generate_vectors, generate_scalars
from benchmark_functions.sphere import sphere_func

In [None]:
np.random.seed(1000)

In [None]:
# data generation configuration
input_dimention = 4
domain = [-5, 5]

# noise configuration
add_noise = True
noise_mean = 0
noise_std_deviation = 0.5

# data set configuration
data_set_size = 10_000_000
training_set_fraction = 0.85
test_set_fraction = 0.15

# model configuration
neighbour_count = 3

In [None]:
# generate training data
data_set_vectors = generate_vectors(input_dimention, domain, data_set_size)
data_set_scalars = generate_scalars(data_set_vectors, sphere_func)

# separate training data from test data
training_set_vectors, test_set_vectors, training_set_scalars, test_set_scalars = train_test_split(
    data_set_vectors, 
    data_set_scalars, 
    test_size = test_set_fraction, 
    random_state = 42
)

# generate noise
training_noise = []

if add_noise:
    training_noise = np.random.normal(noise_mean, noise_std_deviation, len(training_set_scalars))
    training_set_scalars += training_noise

## Data pre-processing

In [None]:
vector_scaler = MinMaxScaler().fit(training_set_vectors)
scalar_scaler = MinMaxScaler().fit(training_set_scalars.reshape(-1, 1))

In [None]:
training_set_vectors = vector_scaler.transform(training_set_vectors)
test_set_vectors = vector_scaler.transform(test_set_vectors)

In [None]:
training_set_scalars = scalar_scaler.transform(training_set_scalars.reshape(-1, 1))
test_set_scalars = scalar_scaler.transform(test_set_scalars.reshape(-1, 1))

# K-Nearest Neighbors

In [None]:
model = KNeighborsRegressor(n_neighbors = neighbour_count)

In [None]:
kf = KFold(n_splits = 5, shuffle = True, random_state = 42)

scores = cross_val_score(
    model,
    training_set_vectors,
    training_set_scalars,
    scoring = "neg_mean_squared_error",
    cv = kf
)

validation_losses = -scores
print(f"Mean MSE: {validation_losses.mean():.6f}")

In [None]:
model.fit(training_set_vectors, training_set_scalars)

In [None]:
test_set_predictions = model.predict(test_set_vectors)

In [None]:
test_set_predictions = scalar_scaler.inverse_transform(test_set_predictions)
test_set_scalars = scalar_scaler.inverse_transform(test_set_scalars)

In [None]:
absolute_errors = np.abs(test_set_predictions - test_set_scalars)

# min
min_error = np.min(absolute_errors)
print(f"Test set absolute error min: {min_error:.8f}")

# max
max_error = np.max(absolute_errors)
print(f"Test set absolute error max: {max_error:.8f}")

# mean
error_mean = np.mean(absolute_errors)
print(f"Test set absolute error mean: {error_mean:.8f}")

# standard deviation
error_std = np.std(absolute_errors)
print(f"Test set absolute error standard deviation: {error_std:.8f}")