In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
from sklearn.preprocessing import StandardScaler

# Load the dataset
df = pd.read_csv('/kaggle/input/credit/Credit.csv')

# Display the dataset before standardization
print("Dataset before standardization:")
print(df.head())

# Select only the numeric columns for standardization (excluding target or categorical columns if any)
numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns

# Initialize the StandardScaler
scaler = StandardScaler()

# Standardize the numeric columns
df[numeric_columns] = scaler.fit_transform(df[numeric_columns])

# Display the dataset after standardization
print("\nDataset after standardization:")
print(df.head())

target_column = 'Class'

# Prepare features (X) and target (y)
X = df.drop(columns=[target_column])  # Features
y = df[target_column]                  # Target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

k_values = [1, 3, 5, 10]

# Train and test the KNN model for each K value
for k in k_values:
    # Initialize the KNN classifier
    knn = KNeighborsClassifier(n_neighbors=k)
    
    # Train the model
    knn.fit(X_train, y_train)
    
    # Predict on the test set
    y_pred = knn.predict(X_test)
    
    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_test, y_pred)
    
    # Calculate True Positive Rate (TPR) and False Negative Rate (FNR)
    TP = conf_matrix[1, 1]  # True Positives
    FN = conf_matrix[1, 0]  # False Negatives
    TPR = TP / (TP + FN) if (TP + FN) > 0 else 0  # True Positive Rate
    FNR = FN / (TP + FN) if (TP + FN) > 0 else 0  # False Negative Rate
    
    # Print confusion matrix and classification report
    print(f"\nConfusion Matrix for K={k}:")
    print(conf_matrix)
    
    print(f"\nClassification Report for K={k}:")
    print(classification_report(y_test, y_pred))

    # Include TPR and FNR in the output
    print(f"True Positive Rate for K={k}: {TPR:.4f}")
    print(f"False Negative Rate for K={k}: {FNR:.4f}")

    # ROC Curve analysis
    y_prob = knn.predict_proba(X_test)[:, 1]  # Probability estimates for the positive class
    fpr, tpr, thresholds = roc_curve(y_test.apply(lambda x: 1 if x == 'Good' else 0), y_prob)
    roc_auc = auc(fpr, tpr)

    # Plot ROC Curve
    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC Curve for K={k} (AUC = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'ROC Curve for K={k}')
    plt.legend(loc="lower right")
    plt.show()
