In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score

data = pd.read_csv('diabetes.csv')

X = data.iloc[:, :-1]
y = data.iloc[:, -1]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

models = {
    'Logistic Regression': LogisticRegression(),
    'SVM': SVC(),
    'k-Means': KMeans(n_clusters=2)
}

kf = KFold(n_splits=4, shuffle=True, random_state=1)

for name, model in models.items():
    if name == 'k-Means':
        model.fit(X_scaled)
        y_pred = model.predict(X_scaled)
        y_pred = (y_pred == y_pred.mean()).astype(int)
        accuracy = accuracy_score(y, y_pred)
    else:
        scores = cross_val_score(model, X_scaled, y, cv=kf, scoring='accuracy')
        accuracy = scores.mean()
    
    print(f'{name} Accuracy: {accuracy:.4f}')


Logistic Regression Accuracy: 0.7695
SVM Accuracy: 0.7747
k-Means Accuracy: 0.6510
