In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline   
from sklearn.metrics import accuracy_score

# Load the dataset
file_path = 'Cirrhosis_Data.xlsx'
data = pd.read_excel(file_path)

# Preprocessing the data
data = data.drop(columns=['S.NO'])
data['Gender'] = data['Gender'].map({'male': 0, 'female': 1})
data['Hepatitis_C_infection'] = data['Hepatitis_C_infection'].map({'negative': 0, 'positive': 1})
data['Predicted Value'] = data['Predicted Value(Out Come-Patient suffering from liver  cirrosis or not)'].map({'YES': 1, 'NO': 0})
data = data.drop(columns=['Predicted Value(Out Come-Patient suffering from liver  cirrosis or not)'])

# Replace non-numeric entries with correct values
data.replace({'o.4': 0.4}, inplace=True)
data = data.apply(pd.to_numeric, errors='coerce')

# Drop rows where the target variable is NaN
data = data.dropna(subset=['Predicted Value'])

# Separate the features and the target
X = data.drop(columns=['Predicted Value'])
y = data['Predicted Value']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a pipeline with an imputer and a random forest classifier
pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('model', RandomForestClassifier(n_estimators=100, random_state=42))
])

# Train the model
pipeline.fit(X_train, y_train)

# Predict on the test set
y_pred = pipeline.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")


Model Accuracy: 1.00


  data.replace({'o.4': 0.4}, inplace=True)


In [12]:
# Example of new data for prediction
new_data = pd.DataFrame({
    'Age': [87],
    'Gender': [1],  # 0 for male, 1 for female
    'Hepatitis_C_infection': [1],  # 0 for negative, 1 for positive
    'Total Bilirubin(mg/dl)': [0.55],
    'Direct(mg/dl)': [0.27],
    'Indirect(mg/dl)': [0.28],
    'Albumin(g/dl)': [3.59],
    'Globulin(g/dl)': [2.37],
    'A/G Ratio': [1.51],
    'AL.Phosphatase(U/L)': [52.6],
    'SGOT/AST(U/L)': [21.5],
    'SGPT/ALT(U/L)': [12.3]
})

# Use the pipeline to predict the outcome
predicted_value = pipeline.predict(new_data)

# Print the predicted value
print(f"Predicted Value: {'YES' if predicted_value[0] == 1 else 'NO'}")


Predicted Value: YES
