In [5]:
# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
import pickle
import os
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, StackingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

# Step 2: Load the dataset
data = pd.read_csv('D:/Projects/LLM Models/mlmodel/Liver Disease/anemia.csv')

# Step 3: Data Preprocessing
# Inspect the data
print(data.head())
print(data.info())

# Handle missing values (if any)
data = data.dropna()

# Assuming the target variable is continuous and is named 'Result'
X = data.drop(columns=['Result'])
y = data['Result']

# Encode categorical variables if necessary
# X = pd.get_dummies(X, drop_first=True)

# Step 4: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 5: Build base models for regression
base_models = [
    ('lr', LinearRegression()),
    ('rf', RandomForestRegressor(n_estimators=100)),
    ('dt', DecisionTreeRegressor()),
    ('svr', SVR())
]

# Step 6: Stacking
# Meta-model for regression
meta_model = GradientBoostingRegressor(n_estimators=100)

# Stacking regressor
stacking_model = StackingRegressor(estimators=base_models, final_estimator=meta_model, cv=5)

# Step 7: Train and evaluate the model
# Train the stacking model
stacking_model.fit(X_train, y_train)
folder_path = 'mlmodel/Liver Disease/saved_models'
os.makedirs(folder_path, exist_ok=True)  # Create the folder if it doesn't exist

# Define the file name and path for the pickle file
model_file_path = os.path.join(folder_path, 'anemia_stacking_model.pkl')

# Save the trained stacking model as a pickle file
with open(model_file_path, 'wb') as model_file:
    pickle.dump(stacking_model, model_file)

print(f'Model saved at: {model_file_path}')

# Predict on the test set
y_pred = stacking_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse:.2f}')
print(f'R^2 Score: {r2:.2f}')

# Optional: Compare with individual models (not shown in original classification code)
for name, model in base_models:
    model.fit(X_train, y_train)
    model_pred = model.predict(X_test)
    model_mse = mean_squared_error(y_test, model_pred)
    model_r2 = r2_score(y_test, model_pred)
    print(f'{name} - Mean Squared Error: {model_mse:.2f}, R^2 Score: {model_r2:.2f}')


   Gender  Hemoglobin   MCH  MCHC   MCV  Result
0       1        14.9  22.7  29.1  83.7       0
1       0        15.9  25.4  28.3  72.0       0
2       0         9.0  21.5  29.6  71.2       1
3       0        14.9  16.0  31.4  87.5       0
4       1        14.7  22.0  28.2  99.5       0
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1421 entries, 0 to 1420
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Gender      1421 non-null   int64  
 1   Hemoglobin  1421 non-null   float64
 2   MCH         1421 non-null   float64
 3   MCHC        1421 non-null   float64
 4   MCV         1421 non-null   float64
 5   Result      1421 non-null   int64  
dtypes: float64(4), int64(2)
memory usage: 66.7 KB
None
Model saved at: mlmodel/Liver Disease/saved_models\anemia_stacking_model.pkl
Mean Squared Error: 0.00
R^2 Score: 1.00
lr - Mean Squared Error: 0.07, R^2 Score: 0.70
rf - Mean Squared Error: 0.00, R^2 Score: 1.00
dt - Mean 

In [None]:
# MCH: MCH is short for "mean corpuscular hemoglobin." It's the average amount in each of your red blood cells of a protein called hemoglobin, which carries oxygen around your body.
# MCHC: MCHC stands for mean corpuscular hemoglobin concentration. It's a measure of the average concentration of hemoglobin inside a single red blood cell.
# MCV: MCV stands for mean corpuscular volume. An MCV blood test measures the average size of your red blood cells.
# Results: 0- not anemic, 1-anemic



In [12]:
import numpy as np
import pandas as pd

# Assuming you have a trained and tuned stacking model and scaler from your previous steps
# stacking_model_tuned = ... (your trained model)
# scaler = ... (your scaler used during training)

def predict_anemia(input_data):
    """
    Predicts anemia based on the input data.

    Parameters:
    input_data (list or array): A list or array containing the feature values.

    Returns:
    str: 'Anemia' if predicted to have anemia, otherwise 'No Anemia'.
    """
    # Convert input data to a numpy array and reshape it for prediction
    input_data = np.array(input_data).reshape(1, -1)
    
    
    
    # Predict using the trained stacking model
    prediction = stacking_model.predict(input_data)
    
    # Return the prediction as a readable string
    return 'Anemia' if prediction[0] == 1 else 'No Anemia'

# Example usage:
# Example input: [Gender, Hemoglobin, MCH, MCHC, MCV]
example_input = [1, 14.9, 22.7, 29.1, 83.7]  # Modify this list according to your dataset's features
prediction = predict_anemia(example_input)
print(prediction)


No Anemia




In [6]:
# Load the saved stacking model from the pickle file
with open(model_file_path, 'rb') as model_file:
    loaded_stacking_model = pickle.load(model_file)

# Function to predict anemia using the loaded model
def predict_anemia(input_data):
    # Convert input data to a numpy array and reshape it for prediction
    input_data = np.array(input_data).reshape(1, -1)
    
    # Predict using the loaded stacking model
    prediction = loaded_stacking_model.predict(input_data)
    
    # Return the prediction as a readable string
    return 'Anemia' if prediction[0] == 1 else 'No Anemia'

# Example usage:
example_input = [1, 14.9, 22.7, 29.1, 83.7]  # Modify this list according to your dataset's features
prediction = predict_anemia(example_input)
print(prediction)


No Anemia


