In [1]:
# This code conducts a grid search over a predefined set of hyperparameters for the KNN model, 
#evaluates the performance using cross-validation, and identifies the best combination of hyperparameters. 
#The model is then retrained with these optimal parameters and evaluated on the test set.

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score
import pandas as pd

# Load the dataset
data = pd.read_csv('../data/training_data.csv')

# Preparing the data
X = data.drop('increase_stock', axis=1) 
y = data['increase_stock']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# K-Nearest Neighbors classifier
knn = KNeighborsClassifier()

# Parameters for grid search
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

# Grid search for hyperparameter tuning
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train_scaled, y_train)

# Best parameters
best_params = grid_search.best_params_

# Training the model with the best parameters
knn_best = KNeighborsClassifier(**best_params)
knn_best.fit(X_train_scaled, y_train)

# Predictions and evaluation
y_pred = knn_best.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Best Parameters:", best_params)
print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Best Parameters: {'metric': 'manhattan', 'n_neighbors': 11, 'weights': 'distance'}
Accuracy: 0.859375
Classification Report:
                   precision    recall  f1-score   support

high_bike_demand       0.56      0.46      0.51        50
 low_bike_demand       0.90      0.93      0.92       270

        accuracy                           0.86       320
       macro avg       0.73      0.70      0.71       320
    weighted avg       0.85      0.86      0.85       320

