In [3]:
# --- Import Libraries ---
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
import random
import warnings

warnings.filterwarnings('ignore')

# --- (a) Load Dataset ---
df = pd.read_csv("pima-indians-diabetes.csv", header=None)

# Add proper column names (if file has no header)
df.columns = [
    'Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',
    'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'
]

print("--- Data Head ---")
print(df.head())

# --- (b) Data Preprocessing ---
df.fillna(df.mean(numeric_only=True), inplace=True)

# --- (c) Feature & Target Split ---
X = df.drop(columns=['Outcome'])
y = df['Outcome']

# --- (d) Train-Test Split ---
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# --- (e) Train Decision Tree ---
dt_clf = DecisionTreeClassifier(random_state=42)
dt_clf.fit(X_train, y_train)

# --- (f) Evaluate Model ---
y_pred = dt_clf.predict(X_test)
print("\n--- Model Evaluation ---")
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# --- (g) Test for Random Sample ---
print("\n--- Random Sample Test ---")
random_index = random.randint(0, len(X_test) - 1)
sample = X_test.iloc[[random_index]]
actual = y_test.iloc[random_index]
predicted = dt_clf.predict(sample)[0]

print(f"Random Sample Index: {random_index}")
print(f"Sample Data:\n{sample}")
print(f"Actual Outcome: {actual}")
print(f"Predicted Outcome: {predicted}")

if predicted == 1:
    print("→ Model Prediction: Diabetic")
else:
    print("→ Model Prediction: Not Diabetic")


--- Data Head ---
   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  

--- Model Evaluation ---
Accuracy: 0.75

Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.76      0.79        99
           1       0.62      0.73      0.67        55

    accuracy                           0.75       154
  