In [20]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, precision_score, recall_score, f1_score
from sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.datasets import load_diabetes
from tabulate import tabulate

### Load Datasets

In [9]:
diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target


# Binarize the target variable for logistic regression and KNN classification


In [11]:
y_binary = (y > 140).astype(int)

# Split the dataset into training and test sets for regression


In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [13]:
X_train_bin, X_test_bin, y_train_bin, y_test_bin = train_test_split(X, y_binary, test_size=0.2, random_state=42)


In [14]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
X_train_bin_scaled = scaler.fit_transform(X_train_bin)
X_test_bin_scaled = scaler.transform(X_test_bin)

In [15]:
lin_reg = LinearRegression()
lin_reg.fit(X_train_scaled, y_train)
y_pred_lin_reg = lin_reg.predict(X_test_scaled)

In [16]:
log_reg = LogisticRegression()
log_reg.fit(X_train_bin_scaled, y_train_bin)
y_pred_log_reg = log_reg.predict(X_test_bin_scaled)

In [17]:
knn_reg = KNeighborsRegressor(n_neighbors=5)
knn_reg.fit(X_train_scaled, y_train)
y_pred_knn_reg = knn_reg.predict(X_test_scaled)

In [21]:
knn_clf = KNeighborsClassifier(n_neighbors=5)
knn_clf.fit(X_train_bin_scaled, y_train_bin)
y_pred_knn_clf = knn_clf.predict(X_test_bin_scaled)

In [22]:
mse_lin_reg = mean_squared_error(y_test, y_pred_lin_reg)
r2_lin_reg = r2_score(y_test, y_pred_lin_reg)
print(f"Linear Regression - MSE: {mse_lin_reg}, R2: {r2_lin_reg}")

Linear Regression - MSE: 2900.193628493483, R2: 0.45260276297191915


In [23]:
accuracy_log_reg = accuracy_score(y_test_bin, y_pred_log_reg)
precision_log_reg = precision_score(y_test_bin, y_pred_log_reg)
recall_log_reg = recall_score(y_test_bin, y_pred_log_reg)
f1_log_reg = f1_score(y_test_bin, y_pred_log_reg)
print(f"Logistic Regression - Accuracy: {accuracy_log_reg}, Precision: {precision_log_reg}, Recall: {recall_log_reg}, F1 Score: {f1_log_reg}")

Logistic Regression - Accuracy: 0.7303370786516854, Precision: 0.6904761904761905, Recall: 0.725, F1 Score: 0.7073170731707317


In [24]:
mse_knn_reg = mean_squared_error(y_test, y_pred_knn_reg)
r2_knn_reg = r2_score(y_test, y_pred_knn_reg)
print(f"KNN Regression - MSE: {mse_knn_reg}, R2: {r2_knn_reg}")

KNN Regression - MSE: 3047.449887640449, R2: 0.42480887066066253


In [25]:
accuracy_knn_clf = accuracy_score(y_test_bin, y_pred_knn_clf)
precision_knn_clf = precision_score(y_test_bin, y_pred_knn_clf)
recall_knn_clf = recall_score(y_test_bin, y_pred_knn_clf)
f1_knn_clf = f1_score(y_test_bin, y_pred_knn_clf)
print(f"KNN Classification - Accuracy: {accuracy_knn_clf}, Precision: {precision_knn_clf}, Recall: {recall_knn_clf}, F1 Score: {f1_knn_clf}")

KNN Classification - Accuracy: 0.6853932584269663, Precision: 0.65, Recall: 0.65, F1 Score: 0.65


In [27]:
table_data = [
    ['Linear Regression', mse_lin_reg, r2_lin_reg, None, None, None, None],
    ['Logistic Regression', None, None, accuracy_log_reg, precision_log_reg, recall_log_reg, f1_log_reg],
    ['KNN Regression', mse_knn_reg, r2_knn_reg, None, None, None, None],
    ['KNN Classification', None, None, accuracy_knn_clf, precision_knn_clf, recall_knn_clf, f1_knn_clf]
]

In [28]:
headers = ['Model', 'MSE', 'R2', 'Accuracy', 'Precision', 'Recall', 'F1 Score']


In [31]:
print(tabulate(table_data, headers=headers, tablefmt='grid'))