In [3]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Load dataset
df = pd.read_csv(r"C:\Users\visdas\Downloads\indian_liver_patient.csv")

# Preprocess
df['Gender'] = df['Gender'].map({'Male':1, 'Female':0})
df['Dataset'] = df['Dataset'].replace(2, 0)  # Target: 1 = Liver, 0 = Healthy

# Handle missing values
df.fillna(df.mean(), inplace=True)

# Features & target
X = df.drop(columns=['Dataset'])
y = df['Dataset']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train model
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train_scaled, y_train)

# Evaluate
y_pred = model.predict(X_test_scaled)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Save model & scaler
joblib.dump(model, 'liver_model.pkl')
joblib.dump(scaler, 'liver_scaler.pkl')
print("Model & Scaler saved!")

Accuracy: 0.7521367521367521
              precision    recall  f1-score   support

           0       0.63      0.35      0.45        34
           1       0.78      0.92      0.84        83

    accuracy                           0.75       117
   macro avg       0.70      0.63      0.65       117
weighted avg       0.73      0.75      0.73       117

Model & Scaler saved!
