In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, roc_curve, auc
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

# Set style for better visualizations
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("viridis")

# Load data
df = pd.read_csv("preprocessed_crime_data.csv")
df['DATE OCC'] = pd.to_datetime(df['DATE OCC'])
df['Year'] = df['DATE OCC'].dt.year

# Select features and target
feature_cols = ['Rpt Dist No', 'LAT', 'LON', 'TIME OCC', 'Day of Week', 'Vict Age', 'Premis Cd']
X = df[feature_cols]
y = df['Target']

# Split data into 80% training and 20% testing
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Parameter values for KNN - expanded range for better visualization
neighbors_list = [1, 2, 3, 5, 7, 9, 11, 15, 19, 25]
results = []
y_pred_all = {}
probas_all = {}

# Iterate over each n_neighbors value
for n_neighbors in neighbors_list:
    # Create pipeline with scaling
    knn = Pipeline([
        ('scaler', StandardScaler()),
        ('knn', KNeighborsClassifier(n_neighbors=n_neighbors, weights='distance'))
    ])

    # Fit model
    knn.fit(X_train, y_train)

    # Make predictions
    y_pred = knn.predict(X_test)
    y_pred_all[n_neighbors] = y_pred

    # Get probability predictions if available
    try:
        probas = knn.predict_proba(X_test)[:, 1]
        probas_all[n_neighbors] = probas
    except:
        probas_all[n_neighbors] = None

    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)
    cm = confusion_matrix(y_test, y_pred)

    # Calculate class-specific metrics
    tn, fp, fn, tp = cm.ravel()
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0

    # Collecting metrics
    results.append({
        "Model": f"KNN (k={n_neighbors})",
        "n_neighbors": n_neighbors,
        "Accuracy": round(accuracy, 5),
        "Precision (Class 1)": round(report['1']['precision'], 5) if '1' in report else 0,
        "Recall (Class 1)": round(report['1']['recall'], 5) if '1' in report else 0,
        "F1 Score (Class 1)": round(report['1']['f1-score'], 5) if '1' in report else 0,
        "Specificity": round(specificity, 5),
        "TP": tp,
        "FP": fp,
        "TN": tn,
        "FN": fn
    })

# Create DataFrame
results_df = pd.DataFrame(results)

# Sort by Accuracy for better readability (changed from F1 Score)
results_df = results_df.sort_values('Accuracy', ascending=False).reset_index(drop=True)

# Print styled results table
print("\n===== KNN Classifier Results (Sorted by Accuracy) =====")
display_cols = ["Model", "Accuracy", "Precision (Class 1)", "Recall (Class 1)",
                "F1 Score (Class 1)", "Specificity"]
print(results_df[display_cols].to_string(index=False))

# Additional analysis: Print best model details
best_model = results_df.iloc[0]
print(f"\n===== Best Model Details =====")
print(f"Best k value: {int(best_model['n_neighbors'])}")
print(f"Accuracy: {best_model['Accuracy']:.5f}")
print(f"F1 Score (Class 1): {best_model['F1 Score (Class 1)']:.5f}")
print(f"Precision (Class 1): {best_model['Precision (Class 1)']:.5f}")
print(f"Recall (Class 1): {best_model['Recall (Class 1)']:.5f}")
print(f"Specificity: {best_model['Specificity']:.5f}")




===== KNN Classifier Results (Sorted by Accuracy) =====
     Model  Accuracy  Precision (Class 1)  Recall (Class 1)  F1 Score (Class 1)  Specificity
KNN (k=25)   0.84008              0.61159           0.35214             0.44694      0.94974
KNN (k=19)   0.83714              0.59118           0.36450             0.45096      0.94335
KNN (k=15)   0.83600              0.58280           0.37397             0.45559      0.93984
KNN (k=11)   0.83389              0.56978           0.38674             0.46075      0.93438
 KNN (k=9)   0.83170              0.55786           0.39909             0.46531      0.92892
 KNN (k=7)   0.82777              0.54064           0.40815             0.46515      0.92207
 KNN (k=5)   0.82013              0.51228           0.41227             0.45687      0.91179
 KNN (k=3)   0.81061              0.48211           0.43287             0.45616      0.89550
 KNN (k=2)   0.78703              0.42820           0.47900             0.45218      0.85626
 KNN (k=1)   