In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import joblib

df = pd.read_csv('liver_data.csv')
print("✅ Dataset loaded successfully.")

print("\n🔍 Missing values:\n", df.isnull().sum())
df.dropna(inplace=True)
print("✅ After dropping nulls, shape =", df.shape)

X = df.drop('Diagnosis', axis=1)
y = df['Diagnosis']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(
    n_estimators=300,
    max_depth=10,
    min_samples_split=5,
    min_samples_leaf=2,
    random_state=42
)
model.fit(X_train, y_train)
print("✅ Model training complete.")

y_pred = model.predict(X_test)

print("\n🎯 Model Performance:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

joblib.dump(model, 'liver_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
print("\n✅ Model and scaler saved as liver_model.pkl and scaler.pkl")


✅ Dataset loaded successfully.

🔍 Missing values:
 Age                   0
Gender                0
BMI                   0
AlcoholConsumption    0
Smoking               0
GeneticRisk           0
PhysicalActivity      0
Diabetes              0
Hypertension          0
LiverFunctionTest     0
Diagnosis             0
dtype: int64
✅ After dropping nulls, shape = (1700, 11)
✅ Model training complete.

🎯 Model Performance:
Accuracy: 0.9029411764705882

Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.86      0.89       162
           1       0.88      0.94      0.91       178

    accuracy                           0.90       340
   macro avg       0.91      0.90      0.90       340
weighted avg       0.90      0.90      0.90       340


Confusion Matrix:
 [[139  23]
 [ 10 168]]

✅ Model and scaler saved as liver_model.pkl and scaler.pkl


In [6]:
import pandas as pd

df = pd.read_csv('liver_data.csv')
print("Columns in your dataset:")
print(df.columns.tolist())


Columns in your dataset:
['Age', 'Gender', 'BMI', 'AlcoholConsumption', 'Smoking', 'GeneticRisk', 'PhysicalActivity', 'Diabetes', 'Hypertension', 'LiverFunctionTest', 'Diagnosis']
