In [59]:
import numpy as np
import pandas as pd

In [60]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

In [61]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [62]:
data = load_diabetes()
x = data.data
y = data.target

In [63]:
y_binary = (y > np.median(y)).astype(int)


In [64]:
x_train, x_test, y_train, y_test = train_test_split(
  x, y_binary,
  test_size=0.2,
  random_state=42
  )

In [65]:
scaler = StandardScaler()
x_train_scaled= scaler.fit_transform(x_train)
x_test_scaled= scaler.transform(x_test)

In [66]:
# model 1: k-nearest neighbours (knn)
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(x_train_scaled, y_train)

knn_pred = knn.predict(x_test_scaled)



In [67]:
# model 2 :logistic regression (liblinear)
lr_liblinear = LogisticRegression(solver='liblinear')
lr_liblinear.fit(x_train_scaled, y_train)

lr_liblinear_pred = lr_liblinear.predict(x_test_scaled)

In [68]:
# model 3: logistic regression (lbfgs)
lr_lbfgs = LogisticRegression(
    solver='lbfgs',
    max_iter=1000
    )
lr_lbfgs.fit(x_train_scaled, y_train)

lbfgs_pred = lr_lbfgs.predict(x_test_scaled)

In [69]:

def evaluate(name, y_true, y_pred):
    print(f"\n{name}")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))
    print("Classification Report:\n", classification_report(y_true, y_pred))

evaluate("KNN", y_test, knn_pred)
evaluate("Logistic Regression (liblinear)", y_test, lr_liblinear_pred)
evaluate("Logistic Regression (lbfgs)", y_test, lbfgs_pred)


KNN
Accuracy: 0.6853932584269663
Confusion Matrix:
 [[35 14]
 [14 26]]
Classification Report:
               precision    recall  f1-score   support

           0       0.71      0.71      0.71        49
           1       0.65      0.65      0.65        40

    accuracy                           0.69        89
   macro avg       0.68      0.68      0.68        89
weighted avg       0.69      0.69      0.69        89


Logistic Regression (liblinear)
Accuracy: 0.7303370786516854
Confusion Matrix:
 [[36 13]
 [11 29]]
Classification Report:
               precision    recall  f1-score   support

           0       0.77      0.73      0.75        49
           1       0.69      0.72      0.71        40

    accuracy                           0.73        89
   macro avg       0.73      0.73      0.73        89
weighted avg       0.73      0.73      0.73        89


Logistic Regression (lbfgs)
Accuracy: 0.7303370786516854
Confusion Matrix:
 [[36 13]
 [11 29]]
Classification Report:
       