In [15]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Perceptron, LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import recall_score, f1_score  # Added these metrics

df = pd.read_csv('diabetes.csv')
X = df.drop('Outcome', axis=1)
y = df['Outcome']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Search Grid - Hyperparameters")
slp_params = {
    'max_iter': [1000, 2000],                      #max n of iteration
    'tol': [1e-3, 1e-4],                          #tolerance
    'alpha': [0.0001, 0.001, 0.01],               #Regularization term parameter
    'eta0': [0.0001],                              #learnng rate
    'penalty': ['l2', 'l1', 'elasticnet'] ,      #regulariztion        
    'shuffle': [False],                           #shuffle training data in each iteration
    'early_stopping': [True, False],              #Whether to stop training when validation score isn't improving
    'random_state': [42],
    'class_weight': [{0:1, 1:2}]                         
}

slp = Perceptron() 
slp_grid = GridSearchCV(slp, slp_params, cv=5, scoring='accuracy', n_jobs=-1)
slp_grid.fit(X_train_scaled, y_train)

print("\nBest parameters for Single Layer Perceptron:")
print(slp_grid.best_params_)
print("\nBest cross-validation score:", slp_grid.best_score_)
slp_pred = slp_grid.predict(X_test_scaled)



print("\nConfusion Matrix:")
print(confusion_matrix(y_test, slp_pred))
print("\nClassification Report:")
print(classification_report(y_test, slp_pred))

Search Grid - Hyperparameters

Best parameters for Single Layer Perceptron:
{'alpha': 0.01, 'class_weight': {0: 1, 1: 2}, 'early_stopping': False, 'eta0': 0.0001, 'max_iter': 1000, 'penalty': 'elasticnet', 'random_state': 42, 'shuffle': False, 'tol': 0.001}

Best cross-validation score: 0.6855924296947887

Confusion Matrix:
[[63 36]
 [22 33]]

Classification Report:
              precision    recall  f1-score   support

           0       0.74      0.64      0.68        99
           1       0.48      0.60      0.53        55

    accuracy                           0.62       154
   macro avg       0.61      0.62      0.61       154
weighted avg       0.65      0.62      0.63       154



In [6]:
df['Outcome'].value_counts()

Outcome
0    500
1    268
Name: count, dtype: int64

In [None]:
# Logistic Regression Model
# print("\nSearch Grid - Logistic Regression Hyperparameters")
# lr_params = {
#     'C': [0.001],          # Inverse of regularization strength    'C': [0.001, 0.01, 0.1, 1, 10, 100]
#     'penalty': ['l1', 'l2'],                       # Regularization type
#     'solver': ['liblinear'],                       # Algorithm to use
#     'max_iter': [2000],                            # Maximum iterations
#     'class_weight': ['balanced', {0:1, 1:2}],      # Class weights
#     'random_state': [42]
# }

#lr = LogisticRegression()


lr = LogisticRegression(penalty='l2', solver='liblinear', max_iter= 2000, C = 10,
                    class_weight= {0:1, 1:2},
                    random_state=42)

lr.fit(X_train_scaled, y_train)

lr_pred = lr.predict(X_test_scaled)

print("confusion matrix:")
print(confusion_matrix(y_test, lr_pred))
print("\nclassification report")
print(classification_report(y_test, lr_pred))

confusion matrix:
[[66 33]
 [15 40]]

classification report
              precision    recall  f1-score   support

           0       0.81      0.67      0.73        99
           1       0.55      0.73      0.62        55

    accuracy                           0.69       154
   macro avg       0.68      0.70      0.68       154
weighted avg       0.72      0.69      0.69       154

