In [2]:
!pip install catboost





[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: C:\Users\HP\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [22]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import os
import pickle

# Load the dataset
file_path = 'D:/Projects/LLM Models/mlmodel/Liver Disease/indian_liver_patient.csv'  # Update with your actual file path
df = pd.read_csv(file_path)

# Handle missing values
imputer = SimpleImputer(strategy='mean')
df['Albumin_and_Globulin_Ratio'] = imputer.fit_transform(df[['Albumin_and_Globulin_Ratio']])

# Encode the categorical 'Gender' column
label_encoder = LabelEncoder()
df['Gender'] = label_encoder.fit_transform(df['Gender'])

# Split data into features and target
X = df.drop('Dataset', axis=1)
y = df['Dataset']

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Define parameter grids for Random Forest and Gradient Boosting
param_grid_rf = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

param_grid_gb = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Perform GridSearchCV for Random Forest
grid_search_rf = GridSearchCV(RandomForestClassifier(random_state=42), param_grid_rf, cv=5, n_jobs=-1, scoring='accuracy')
grid_search_rf.fit(X_train, y_train)

# Perform GridSearchCV for Gradient Boosting
grid_search_gb = GridSearchCV(GradientBoostingClassifier(random_state=42), param_grid_gb, cv=5, n_jobs=-1, scoring='accuracy')
grid_search_gb.fit(X_train, y_train)

# Get the best models from GridSearchCV
best_rf = grid_search_rf.best_estimator_
best_gb = grid_search_gb.best_estimator_

# Define the meta-model
meta_model = LogisticRegression()

# Define the stacking model with the tuned base models
stacking_model_tuned = StackingClassifier(
    estimators=[('rf', best_rf), ('gb', best_gb)],
    final_estimator=meta_model,
    cv=5
)

# Train the tuned stacking model
stacking_model_tuned.fit(X_train, y_train)
folder_path = 'mlmodel/Liver Disease/saved_models'
os.makedirs(folder_path, exist_ok=True)  # Create the folder if it doesn't exist

# Define the file name and path for the pickle file
model_file_path = os.path.join(folder_path, 'liver_stacking_model.pkl')
##m_file_path = os.path.join(folder_path,'stack.pkl')

# Save the trained stacking model as a pickle file
with open(model_file_path, 'wb') as model_file:
    pickle.dump(stacking_model_tuned, model_file)

print(f'Model saved at: {model_file_path}')

scaler_file_path = os.path.join(folder_path, 'scaler.pkl')
with open(scaler_file_path, 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)
# Make predictions and calculate the accuracy
y_pred_tuned = stacking_model_tuned.predict(X_test)
accuracy_tuned = accuracy_score(y_test, y_pred_tuned)

print(f"Tuned Model Accuracy: {accuracy_tuned:.4f}")


Model saved at: mlmodel/Liver Disease/saved_models\liver_stacking_model.pkl
Tuned Model Accuracy: 0.7607


In [25]:

with open("D:/Projects/LLM Models/savedmodels/liver_stacking_model.pkl", 'rb') as model_file:
    loaded_stacking_model = pickle.load(model_file)# Prediction Code
with open("D:/Projects/LLM Models/mlmodel/Liver Disease/mlmodel/Liver Disease/saved_models/scaler.pkl", 'rb') as model_file:
    scaler = pickle.load(model_file)# Prediction Code
def predict_liver_disease(input_data):
    # Convert input data to a numpy array
    input_data = np.array(input_data).reshape(1, -1)
    # Standardize the input data
    input_data = scaler.transform(input_data)
    
    # Predict using the stacking model
    prediction = loaded_stacking_model.predict(input_data)
    
    # Return the prediction
    return 'Liver Disease' if prediction[0] == 1 else 'No Liver Disease'

# Example usage:
# input_data = [Age, Gender (0 for Female, 1 for Male), Total_Bilirubin, Direct_Bilirubin, 
#               Alkaline_Phosphotase, Alamine_Aminotransferase, Aspartate_Aminotransferase, 
#               Total_Protiens, Albumin, Albumin_and_Globulin_Ratio]
example_input = [30, 1, 0.20, 0.10, 150, 21.0, 11.0, 7.00, 4.00, 1.33]
prediction = predict_liver_disease(example_input)
print(prediction)

Liver Disease




In [1]:
import pickle
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Paths to model and label encoder files
model_paths = {
    "anemia": 'D:/Projects/LLM Models/savedmodels/anemia_stacking_model.pkl',
    "cirrhosis": "D:/Projects/LLM Models/savedmodels/cirrhosis_pipeline_model.pkl",
    "hepatitis": "D:/Projects/LLM Models/savedmodels/hepatitis_stacking_model.pkl",
    "liver": "D:/Projects/LLM Models/savedmodels/liver_stacking_model.pkl",
    "kidney": "D:/Projects/LLM Models/savedmodels/kidney_disease_model.pkl",
}
label_encoder_kidney = "D:/Projects/LLM Models/savedmodels/label_encoders.pkl"
gender = "Female"
age = 87
blood_test_results={'haemoglobin': 10.0, 'PCV': 0, 'RBC': 4.7, 'MCV': 95.7, 'MCH': 21.3, 'MCHC': 22.2, 'RDW': 0, 'platelet': 2.8, 'neutrophils': 55.0, 'lymphocytes': 0, 'eosinophils': 2.0, 'monocytes': 3.0, 'basophils': 0.0, 'nlr': 0}
liver_function_test_results={'bilirubin_total': 0, 'bilirubin_direct': 0, 'bilirubin_indirect': 0, 'ALT': 0, 'AST': 0, 'Alk': 139.0, 'Protein': 0, 'Albumin': 4.0, 'Globulin': 2.9, 'ag': 1.38}
kidney_test_results = {'creatinine': 0.9, 'urea': 22.0, 'blood_urea': 0, 'calcium': 9.0, 'phosphorus': 0, 'sodium': 132.0, 'potassium': 3.9, 'chloride': 0}

# Load models and label encoder
with open(model_paths["anemia"], 'rb') as file:
    anemia_model = pickle.load(file)
with open(model_paths["cirrhosis"], 'rb') as file:
    cirrhosis_model = pickle.load(file)
with open(model_paths["hepatitis"], 'rb') as file:
    hepatitis_model = pickle.load(file)
with open(model_paths["liver"], 'rb') as file:
    liver_model = pickle.load(file)
with open(model_paths["kidney"], 'rb') as file:
    kidney_model = pickle.load(file)
with open(label_encoder_kidney, 'rb') as file:
    kidney_label_encoder = pickle.load(file)

# Define gender value
gender = "Male"  # Example value; should be set dynamically
if gender=="Male" or gender=="M":
        gender_value = 1
else:
        gender_value = 0

# Predicting Anemia
def predict_anemia(input_data):
    input_data = np.array(input_data).reshape(1, -1)
    prediction = anemia_model.predict(input_data)
    return 'Anemia' if prediction[0] == 1 else 'No Anemia'


anemia_data = [gender_value, blood_test_results['haemoglobin'], blood_test_results['MCH'], blood_test_results['MCHC'], blood_test_results['MCV']]
anemia_disease = predict_anemia(anemia_data)

# Predicting Kidney Disease
def predict_kidney_disease(new_data):
    # Convert the input data to a DataFrame if it's not already one
    if isinstance(new_data, dict):
        new_data = pd.DataFrame([new_data])
    
    # Handle missing values
    new_data[numerical_cols] = new_data[numerical_cols].fillna(new_data[numerical_cols].median())
    
    # Fill and encode categorical variables
    for col in categorical_cols:
        if col in new_data:
            new_data[col] = new_data[col].fillna(new_data[col].mode().iloc[0])
            if col in kidney_label_encoder:
                new_data[col] = new_data[col].apply(lambda x: kidney_label_encoder[col].transform([x])[0]
                                                    if x in kidney_label_encoder[col].classes_ else -1)
    
    # Ensure the order of columns matches the training data
    required_columns = numerical_cols + categorical_cols
    new_data = new_data[required_columns]
    
    # Make a prediction using the trained model
    prediction = kidney_model.predict(new_data)
    
    # Decode the prediction if necessary
    if 'classification' in kidney_label_encoder:
        predicted_class = kidney_label_encoder['classification'].inverse_transform(prediction)
        return predicted_class[0]
    else:
        return prediction[0]

categorical_cols = ['anemia']
numerical_cols = ['age', 'albumin', 'blood urea', 'Creatinine', 'sodium', 'potassium', 'hemoglobin', 'wbc count', 'rbc count']

anemia_value = 'no' if anemia_disease == "No Anemia" else 'yes'
kidney_data = {
    'age': 30,  # Example value; should be set dynamically
    'albumin': liver_function_test_results['Albumin'],
    'blood urea': kidney_test_results['blood_urea'],
    'Creatinine': kidney_test_results['creatinine'],
    'sodium': kidney_test_results['sodium'],
    'potassium': kidney_test_results['potassium'],
    'hemoglobin': blood_test_results['haemoglobin'],
    'wbc count': blood_test_results['neutrophils'] + blood_test_results['lymphocytes'] + blood_test_results['eosinophils'] + blood_test_results['basophils'] + blood_test_results['monocytes'],
    'rbc count': blood_test_results['RBC'],
    'anemia': anemia_value
}
kidney_disease = predict_kidney_disease(kidney_data)

# Predicting Hepatitis
def predict_hepatitis_disease(input_data):
    input_data = np.array(input_data).reshape(1, -1)
    prediction = hepatitis_model.predict(input_data)
    disease_map = {
        0: 'No Disease',
        1: 'Hepatitis',
        2: 'Fibrosis',
        3: 'Cirrhosis',
        4: 'Blood Donor'
    }
    return disease_map.get(prediction[0], 'Unknown')

hepatitis_data = [age, gender_value, liver_function_test_results['Alk'], liver_function_test_results['ALT'], liver_function_test_results['AST'],kidney_test_results['creatinine']]
hepatitis_disease = predict_hepatitis_disease(hepatitis_data)

# Predicting Cirrhosis
def predict_cirrhosis(input_data):
    input_data = np.array(input_data).reshape(1, -1)
    prediction = cirrhosis_model.predict(input_data)
    return prediction[0]

hepatitis_value = 1 if hepatitis_disease == "Hepatitis" else 0
cirrhosis_data = pd.DataFrame({
    'Age': [age],  # Example value; should be set dynamically
    'Gender': [gender_value],  # 0 for male, 1 for female
    'Hepatitis_C_infection': [hepatitis_value],  # 0 for negative, 1 for positive
    'Total Bilirubin(mg/dl)': [liver_function_test_results['bilirubin_total']],
    'Direct(mg/dl)': [liver_function_test_results['bilirubin_direct']],
    'Indirect(mg/dl)': [liver_function_test_results['bilirubin_indirect']],
    'Albumin(g/dl)': [liver_function_test_results['Albumin']],
    'Globulin(g/dl)': [liver_function_test_results['Globulin']],
    'A/G Ratio': [liver_function_test_results['ag']],
    'AL.Phosphatase(U/L)': [liver_function_test_results['Alk']],
    'SGOT/AST(U/L)': [liver_function_test_results['AST']],
    'SGPT/ALT(U/L)': [liver_function_test_results['ALT']]
})
cirrhosis_disease = predict_cirrhosis(cirrhosis_data)

# Predicting Liver Disease
with open("D:/Projects/LLM Models/mlmodel/Liver Disease/mlmodel/Liver Disease/saved_models/scaler.pkl", 'rb') as model_file:
    scaler = pickle.load(model_file)# Prediction Code
def predict_liver_disease(input_data):
    input_data = np.array(input_data).reshape(1, -1)
    input_data = scaler.transform(input_data)
    prediction = liver_model.predict(input_data)
    return 'Liver Disease' if prediction[0] == 1 else 'No Liver Disease'

liver_data = [age, gender_value, liver_function_test_results['bilirubin_total'], liver_function_test_results['bilirubin_direct'], liver_function_test_results['Alk'], liver_function_test_results['ALT'], liver_function_test_results['AST'], 7.00, liver_function_test_results['Albumin'], liver_function_test_results['ag']]  # Example values; should be set dynamically
liver_disease = predict_liver_disease(liver_data)

# Print results
print(f"Anemia Disease: {anemia_disease}")
print(f"Kidney Disease: {kidney_disease}")
print(f"Hepatitis Disease: {hepatitis_disease}")
print(f"Cirrhosis Disease: {cirrhosis_disease}")
print(f"Liver Disease: {liver_disease}")


Anemia Disease: No Anemia
Kidney Disease: ckd
Hepatitis Disease: Hepatitis
Cirrhosis Disease: 1.0
Liver Disease: Liver Disease




In [12]:
import os
import pickle

# Get the current working directory
current_dir = os.getcwd()

# Construct the relative path to the pickle file
relative_path = os.path.join('savedmodels', 'anemia_stacking_model.pkl')
model_path = os.path.join(current_dir, relative_path)

model_path

'd:\\Projects\\LLM Models\\mlmodel\\Liver Disease\\savedmodels\\anemia_stacking_model.pkl'