In [1]:
import zipfile

# Path to the uploaded zip file
zip_file_path = '/content/Liver Patient Dataset (LPD)_train.csv.zip'

# Unzip the file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall('/content/')

print("File extracted!")


File extracted!


In [2]:
import pandas as pd

# Path to the CSV file
dataset_path = '/content/Liver Patient Dataset (LPD)_train.csv'  # Make sure this is the correct path

# Try loading the CSV file with a different encoding
df = pd.read_csv(dataset_path, encoding='ISO-8859-1')

# Display the first few rows of the dataset
df.head()


Unnamed: 0,Age of the patient,Gender of the patient,Total Bilirubin,Direct Bilirubin,Alkphos Alkaline Phosphotase,Sgpt Alamine Aminotransferase,Sgot Aspartate Aminotransferase,Total Protiens,ALB Albumin,A/G Ratio Albumin and Globulin Ratio,Result
0,65.0,Female,0.7,0.1,187.0,16.0,18.0,6.8,3.3,0.9,1
1,62.0,Male,10.9,5.5,699.0,64.0,100.0,7.5,3.2,0.74,1
2,62.0,Male,7.3,4.1,490.0,60.0,68.0,7.0,3.3,0.89,1
3,58.0,Male,1.0,0.4,182.0,14.0,20.0,6.8,3.4,1.0,1
4,72.0,Male,3.9,2.0,195.0,27.0,59.0,7.3,2.4,0.4,1


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30691 entries, 0 to 30690
Data columns (total 11 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   Age of the patient                    30689 non-null  float64
 1   Gender of the patient                 29789 non-null  object 
 2   Total Bilirubin                       30043 non-null  float64
 3   Direct Bilirubin                      30130 non-null  float64
 4    Alkphos Alkaline Phosphotase         29895 non-null  float64
 5    Sgpt Alamine Aminotransferase        30153 non-null  float64
 6   Sgot Aspartate Aminotransferase       30229 non-null  float64
 7   Total Protiens                        30228 non-null  float64
 8    ALB Albumin                          30197 non-null  float64
 9   A/G Ratio Albumin and Globulin Ratio  30132 non-null  float64
 10  Result                                30691 non-null  int64  
dtypes: float64(9), 

In [5]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder

# Load your dataset
data = df

# Handling missing values
# For numeric columns, we can fill missing values with the median
imputer = SimpleImputer(strategy='median')
data_imputed = pd.DataFrame(imputer.fit_transform(data.select_dtypes(include=['float64'])))
data_imputed.columns = data.select_dtypes(include=['float64']).columns

# For the categorical column 'Gender of the patient', we can use mode or create a label encoder
label_encoder = LabelEncoder()
data['Gender of the patient'] = label_encoder.fit_transform(data['Gender of the patient'].fillna(data['Gender of the patient'].mode()[0]))

# Now combine the imputed numeric data with the rest of the original data
data_imputed['Gender of the patient'] = data['Gender of the patient']
data_imputed['Result'] = data['Result']  # target column

# Split the data into features and target
X = data_imputed.drop('Result', axis=1)
y = data_imputed['Result']

# Split the data into training and testing sets (80/20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the RandomForestClassifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_classifier.predict(X_test)

# Evaluate the model
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.9975566053103111
Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00      4429
           2       1.00      0.99      1.00      1710

    accuracy                           1.00      6139
   macro avg       1.00      1.00      1.00      6139
weighted avg       1.00      1.00      1.00      6139



In [6]:
import joblib

# Save the trained model
joblib.dump(rf_classifier, 'liver_disease_model.pkl')

# To load the model back
loaded_model = joblib.load('liver_disease_model.pkl')

# You can use the loaded model to make predictions
y_pred_loaded = loaded_model.predict(X_test)
print(f"Accuracy using loaded model: {accuracy_score(y_test, y_pred_loaded)}")


Accuracy using loaded model: 0.9975566053103111


In [7]:
import joblib
import numpy as np

# Load the saved model
model = joblib.load('liver_disease_model.pkl')

# Function to take user input and make predictions
def predict_liver_disease():
    print("Enter the following details for prediction:")

    try:
        age = float(input("Age of the patient: "))
        gender = input("Gender (Male/Female): ").strip().lower()
        total_bilirubin = float(input("Total Bilirubin: "))
        direct_bilirubin = float(input("Direct Bilirubin: "))
        alkphos = float(input("Alkphos Alkaline Phosphotase: "))
        sgpt = float(input("Sgpt Alamine Aminotransferase: "))
        sgot = float(input("Sgot Aspartate Aminotransferase: "))
        total_proteins = float(input("Total Proteins: "))
        albumin = float(input("ALB Albumin: "))
        ag_ratio = float(input("A/G Ratio Albumin and Globulin Ratio: "))

        # Encode Gender (Assuming Male=1, Female=0 as used in training)
        gender_encoded = 1 if gender == "male" else 0

        # Create input array (same order as training data)
        input_data = np.array([[age, gender_encoded, total_bilirubin, direct_bilirubin, alkphos, sgpt, sgot, total_proteins, albumin, ag_ratio]])

        # Predict
        prediction = model.predict(input_data)

        # Print the result
        print("\nPrediction:", "Liver Disease Detected" if prediction[0] == 1 else "No Liver Disease")

    except ValueError:
        print("Invalid input! Please enter numeric values where required.")

# Run the function to take input and predict
predict_liver_disease()


Enter the following details for prediction:
Age of the patient: 34
Gender (Male/Female): Male
Total Bilirubin: 22
Direct Bilirubin: 2
Alkphos Alkaline Phosphotase: 1
Sgpt Alamine Aminotransferase: 2
Sgot Aspartate Aminotransferase: 1
Total Proteins: 20
ALB Albumin: 1
A/G Ratio Albumin and Globulin Ratio: 1

Prediction: Liver Disease Detected


