<a href="https://colab.research.google.com/github/zeeshan-ahmed-ai-tech/Machine-Learning/blob/main/diabetes_ml_ann.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ===============================
# 1. IMPORT LIBRARIES
# ===============================
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [None]:
# ===============================
# 2. LOAD DATASET
# ===============================
df = pd.read_csv("/content/drive/MyDrive/diabetes.csv")

# Display first 5 rows
df.head()


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [None]:
# ===============================
# 3. DATA PREPROCESSING
# ===============================

# Replace invalid zero values with NaN
cols_with_zero = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']
df[cols_with_zero] = df[cols_with_zero].replace(0, np.nan)

# Fill NaN with column mean
df.fillna(df.mean(), inplace=True)


In [None]:
# ===============================
# 4. SPLIT FEATURES & TARGET
# ===============================
X = df.drop('Outcome', axis=1)
y = df['Outcome']


In [None]:
# ===============================
# 5. TRAIN-TEST SPLIT
# ===============================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
# ===============================
# 6. FEATURE SCALING
# ===============================
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
# ===============================
# 7. LOGISTIC REGRESSION MODEL
# ===============================
ml_model = LogisticRegression()
ml_model.fit(X_train_scaled, y_train)


In [None]:
# ===============================
# 8. EVALUATION
# ===============================
y_pred_ml = ml_model.predict(X_test_scaled)

print("ML Model Accuracy:", accuracy_score(y_test, y_pred_ml))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_ml))
print("\nClassification Report:\n", classification_report(y_test, y_pred_ml))


ML Model Accuracy: 0.7532467532467533

Confusion Matrix:
 [[82 17]
 [21 34]]

Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.83      0.81        99
           1       0.67      0.62      0.64        55

    accuracy                           0.75       154
   macro avg       0.73      0.72      0.73       154
weighted avg       0.75      0.75      0.75       154



In [None]:
# ===============================
# 9. IMPORT ANN LIBRARIES
# ===============================
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


In [None]:
# ===============================
# 10. BUILD ANN MODEL
# ===============================
ann_model = Sequential()

# Input + Hidden Layer 1
ann_model.add(Dense(16, activation='relu', input_shape=(X_train_scaled.shape[1],)))

# Hidden Layer 2
ann_model.add(Dense(8, activation='relu'))

# Output Layer
ann_model.add(Dense(1, activation='sigmoid'))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# ===============================
# 11. COMPILE ANN
# ===============================
ann_model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)


In [None]:
# ===============================
# 12. TRAIN ANN
# ===============================
history = ann_model.fit(
    X_train_scaled,
    y_train,
    epochs=50,
    batch_size=16,
    validation_split=0.1,
    verbose=1
)


Epoch 1/50
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.3591 - loss: 0.8637 - val_accuracy: 0.4355 - val_loss: 0.7325
Epoch 2/50
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4178 - loss: 0.7353 - val_accuracy: 0.5323 - val_loss: 0.6903
Epoch 3/50
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5922 - loss: 0.6908 - val_accuracy: 0.6129 - val_loss: 0.6724
Epoch 4/50
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6594 - loss: 0.6561 - val_accuracy: 0.6935 - val_loss: 0.6589
Epoch 5/50
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6970 - loss: 0.6289 - val_accuracy: 0.6774 - val_loss: 0.6438
Epoch 6/50
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7121 - loss: 0.6024 - val_accuracy: 0.6935 - val_loss: 0.6301
Epoch 7/50
[1m35/35[0m [32m━━━━━━━━━

In [None]:
# ===============================
# 13. ANN EVALUATION
# ===============================
y_pred_ann = (ann_model.predict(X_test_scaled) > 0.5).astype(int)

print("ANN Model Accuracy:", accuracy_score(y_test, y_pred_ann))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_ann))
print("\nClassification Report:\n", classification_report(y_test, y_pred_ann))


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
ANN Model Accuracy: 0.7727272727272727

Confusion Matrix:
 [[84 15]
 [20 35]]

Classification Report:
               precision    recall  f1-score   support

           0       0.81      0.85      0.83        99
           1       0.70      0.64      0.67        55

    accuracy                           0.77       154
   macro avg       0.75      0.74      0.75       154
weighted avg       0.77      0.77      0.77       154

