In [2]:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
from pykrige.ok import OrdinaryKriging
from pykrige.uk import UniversalKriging
import matplotlib.pyplot as plt

In [4]:
train_data = pd.read_csv('swissrain100.csv')
test_data = pd.read_csv('swissrain367.csv')

X_train = train_data[['x', 'y', 'altitude']].values
y_train = train_data['value'].values

X_test = test_data[['x', 'y', 'altitude']].values
y_test = test_data['value'].values

In [10]:
len(X_train), len(X_test)

(100, 367)

In [20]:
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import mean_squared_error

# Perform leave-one-out cross-validation for NN
nn_mse_scores = []
for n in range(1, len(X_train)):
    nn = KNeighborsRegressor(n_neighbors=n)
    loo = LeaveOneOut()
    mse_scores = []
    for train_index, test_index in loo.split(X_train):
        X_train_loo, X_test_loo = X_train[train_index], X_train[test_index]
        y_train_loo, y_test_loo = y_train[train_index], y_train[test_index]
        nn.fit(X_train_loo, y_train_loo)
        y_pred_loo = nn.predict(X_test_loo)
        mse_scores.append(mean_squared_error(y_test_loo, y_pred_loo))
    nn_mse_scores.append(np.mean(mse_scores))

optimal_n_nn = np.argmin(nn_mse_scores) + 1
print(f"Optimal n_neighbors for NN: {optimal_n_nn}")

# Perform leave-one-out cross-validation for IDW
idw_mse_scores = []
for n in range(1, len(X_train)):
    idw = KNeighborsRegressor(n_neighbors=n, weights='distance')
    loo = LeaveOneOut()
    mse_scores = []
    for train_index, test_index in loo.split(X_train):
        X_train_loo, X_test_loo = X_train[train_index], X_train[test_index]
        y_train_loo, y_test_loo = y_train[train_index], y_train[test_index]
        idw.fit(X_train_loo, y_train_loo)
        y_pred_loo = idw.predict(X_test_loo)
        mse_scores.append(mean_squared_error(y_test_loo, y_pred_loo))
    idw_mse_scores.append(np.mean(mse_scores))

optimal_n_idw = np.argmin(idw_mse_scores) + 1
print(f"Optimal n_neighbors for IDW: {optimal_n_idw}")

Optimal n_neighbors for NN: 25
Optimal n_neighbors for IDW: 99


In [17]:
nn = KNeighborsRegressor(n_neighbors=optimal_n_nn)
nn.fit(X_train, y_train)
y_pred_nn = nn.predict(X_test)
mse_nn = mean_squared_error(y_test, y_pred_nn)
print(f"Nearest Neighbor MSE: {mse_nn:.2f}")

Nearest Neighbor MSE: 12630.28


In [19]:
idw = KNeighborsRegressor(n_neighbors=4, weights='distance')
idw.fit(X_train, y_train)
y_pred_idw = idw.predict(X_test)
mse_idw = mean_squared_error(y_test, y_pred_idw)
print(f"IDW MSE: {mse_idw:.2f}")

IDW MSE: 14515.69
