In [19]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix


In [20]:
# Load data
# This assumes data is in a DataFrame called df with 'NDVI', 'NBR' as columns and 'label' as target column
df = pd.read_csv('data_file.csv')

In [21]:
# Split data into training and test set
X = df[['NDVI', 'NBR']]
y = df['label']

# Splitting the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [22]:
# Initialize and train the KNN model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

In [23]:
# Make predictions
y_pred = knn.predict(X_test)

In [24]:
from sklearn.metrics import accuracy_score
# Evaluate the model

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


Accuracy: 0.9285714285714286
              precision    recall  f1-score   support

           0       1.00      0.86      0.92         7
           1       0.88      1.00      0.93         7

    accuracy                           0.93        14
   macro avg       0.94      0.93      0.93        14
weighted avg       0.94      0.93      0.93        14

[[6 1]
 [0 7]]


In [25]:
from sklearn.model_selection import GridSearchCV
# Fine-tuning the model using Grid Search
param_grid = {
    'n_neighbors': [3, 5, 7, 9],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

grid_search = GridSearchCV(estimator=knn, param_grid=param_grid,
                           cv=3, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
print("Best Parameters:", best_params)

best_knn = grid_search.best_estimator_

Fitting 3 folds for each of 16 candidates, totalling 48 fits
Best Parameters: {'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'uniform'}


In [26]:
from sklearn.inspection import permutation_importance

# Assume knn is your trained KNN model and X_test, y_test are your test data
result = permutation_importance(knn, X_test, y_test, n_repeats=30, random_state=42, n_jobs=-1)

# Get importances and their standard deviations
importances = result.importances_mean
std = result.importances_std

# Displaying feature importances
for feature, importance, std in zip(['NDVI', 'NBR'], importances, std):
    print(f"Feature: {feature}, Importance: {importance} ± {std}")


Feature: NDVI, Importance: 0.27380952380952384 ± 0.09050751337499148
Feature: NBR, Importance: 0.18333333333333343 ± 0.08182304894018845
