In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Load the phishing dataset
dataset = pd.read_csv("dataset_phishing.csv", encoding='latin1')

# Convert categorical features to numerical labels
label_encoder = LabelEncoder()
for col in dataset.columns:
    if dataset[col].dtype == 'object':
        dataset[col] = label_encoder.fit_transform(dataset[col])

# Split the dataset into features and target variable
X = dataset.drop('status', axis=1)
y = dataset['status']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert DataFrames to NumPy arrays
X_train = X_train.values
X_test = X_test.values

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Create a KNN classification model
model = KNeighborsClassifier()

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

# Extract metrics from confusion matrix
tn, fp, fn, tp = cm.ravel()

# Calculate accuracy
accuracy = (tp + tn) / (tp + tn + fp + fn)
print("Accuracy:", accuracy)

# Calculate sensitivity (recall)
sensitivity = tp / (tp + fn)
print("Sensitivity (Recall):", sensitivity)

# Calculate specificity
specificity = tn / (tn + fp)
print("Specificity:", specificity)

# Calculate error rate
error_rate = (fp + fn) / (tp + tn + fp + fn)
print("Error Rate:", error_rate)

# Calculate precision
precision = tp / (tp + fp)
print("Precision:", precision)

# Calculate F1 score
f1_score = 2 * (precision * sensitivity) / (precision + sensitivity)
print("F1 Score:", f1_score)

# Print classification report
print("\nClassification Report:\n", classification_report(y_test, y_pred))
