In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.metrics import accuracy_score, classification_report
import os
import pickle


In [2]:
# Load the dataset
file_path = 'D:/Projects/LLM Models/mlmodel/Liver Disease/hepatitis_C.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset
print(data.head())

# Check for missing values
print(data.isnull().sum())


   Unnamed: 0  Age Sex   ALP   ALT   AST   CREA       Category
0           1   32   m  52.5   7.7  22.1  106.0  0=Blood Donor
1           2   32   m  70.3  18.0  24.7   74.0  0=Blood Donor
2           3   32   m  74.7  36.2  52.6   86.0  0=Blood Donor
3           4   32   m  52.0  30.6  22.6   80.0  0=Blood Donor
4           5   32   m  74.1  32.6  24.8   76.0  0=Blood Donor
Unnamed: 0     0
Age            0
Sex            0
ALP           18
ALT            1
AST            0
CREA           0
Category       0
dtype: int64


In [3]:
# Drop the 'Unnamed: 0' column
data = data.drop(columns=['Unnamed: 0'])

# Drop rows with missing values
data = data.dropna()

# Convert categorical variables
label_encoder = LabelEncoder()
data['Sex'] = label_encoder.fit_transform(data['Sex'])
data['Category'] = label_encoder.fit_transform(data['Category'])

In [4]:
print(data['Category'])

0      0
1      0
2      0
3      0
4      0
      ..
608    4
609    4
610    4
611    4
612    4
Name: Category, Length: 596, dtype: int32


In [5]:
# Check for missing values
print(data.isnull().sum())

Age         0
Sex         0
ALP         0
ALT         0
AST         0
CREA        0
Category    0
dtype: int64


In [6]:
# Features (X) and target (y)
X = data.drop(columns=['Category'])
y = data['Category']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [9]:
# Define base models
base_models = [
    ('rf', RandomForestClassifier(random_state=42)),
    ('gb', GradientBoostingClassifier(random_state=42))
]

# Define the meta model
meta_model = GradientBoostingClassifier(random_state=42)

# Create the Stacking ensemble model
stacking_model = StackingClassifier(estimators=base_models, final_estimator=meta_model, cv=5)

# Train the model
stacking_model.fit(X_train, y_train)
stacking_model.fit(X_train, y_train)
folder_path = 'mlmodel/Liver Disease/saved_models'
os.makedirs(folder_path, exist_ok=True)  # Create the folder if it doesn't exist

# Define the file name and path for the pickle file
model_file_path = os.path.join(folder_path, 'hepatitis_stacking_model.pkl')

# Save the trained stacking model as a pickle file
with open(model_file_path, 'wb') as model_file:
    pickle.dump(stacking_model, model_file)

print(f'Model saved at: {model_file_path}')




Model saved at: mlmodel/Liver Disease/saved_models\hepatitis_stacking_model.pkl


In [19]:
# Make predictions on the test set
y_pred = stacking_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:\n", report)


Accuracy: 91.67%
Classification Report:
               precision    recall  f1-score   support

           0       0.96      1.00      0.98       104
           1       0.50      0.33      0.40         3
           2       0.50      0.20      0.29         5
           3       0.00      0.00      0.00         2
           4       0.67      0.67      0.67         6

    accuracy                           0.92       120
   macro avg       0.53      0.44      0.47       120
weighted avg       0.90      0.92      0.91       120



In [20]:
import numpy as np

# Define the prediction function
def predict_hepatitis_disease(input_data):
    """
    Predicts whether a person has Hepatitis, Fibrosis, Cirrhosis, or No Disease.
    
    Args:
    input_data: A list containing the following features in order:
                [Age, Sex (0 for Female, 1 for Male), ALP, ALT, AST, CREA]
    
    Returns:
    A string indicating the predicted disease status.
    """
    # Convert input data to a numpy array and reshape for prediction
    input_data = np.array(input_data).reshape(1, -1)
    
    # Predict using the trained stacking model
    prediction = stacking_model.predict(input_data)
    
    # Map the prediction to the corresponding disease status
    disease_map = {
        0: 'No Disease',
        1: 'Hepatitis',
        2: 'Fibrosis',
        3: 'Cirrhosis',
        4: 'Blood Donor'
    }
    
    # Return the predicted disease status
    return disease_map.get(prediction[0], 'Unknown')

# Example usage:
# input_data = [Age, Sex (0 for Female, 1 for Male), ALP, ALT, AST, CREA]
example_input = [32, 1, 74.1, 32.6, 24.8, 76.0]
prediction = predict_hepatitis_disease(example_input)
print(prediction)


No Disease




In [10]:
import numpy as np

# Define the prediction function
with open(model_file_path, 'rb') as model_file:
    loaded_stacking_model = pickle.load(model_file)
def predict_hepatitis_disease(input_data):
    """
    Predicts whether a person has Hepatitis, Fibrosis, Cirrhosis, or No Disease.
    
    Args:
    input_data: A list containing the following features in order:
                [Age, Sex (0 for Female, 1 for Male), ALP, ALT, AST, CREA]
    
    Returns:
    A string indicating the predicted disease status.
    """
    # Convert input data to a numpy array and reshape for prediction
    input_data = np.array(input_data).reshape(1, -1)
    
    # Predict using the trained stacking model
    prediction = loaded_stacking_model.predict(input_data)
    
    # Map the prediction to the corresponding disease status
    disease_map = {
        0: 'No Disease',
        1: 'Hepatitis',
        2: 'Fibrosis',
        3: 'Cirrhosis',
        4: 'Blood Donor'
    }
    
    # Return the predicted disease status
    return disease_map.get(prediction[0], 'Unknown')

# Example usage:
# input_data = [Age, Sex (0 for Female, 1 for Male), ALP, ALT, AST, CREA]
example_input = [32, 1, 150, 21, 11, 0.90]
prediction = predict_hepatitis_disease(example_input)
print(prediction)


Hepatitis


