# **Importing libraries**

In [None]:
# Standard libraries
import os

# Third libraries 
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score



In [None]:
pip install gradio

In [None]:
import gradio as gr

# Loading data

In [None]:
data_training = pd.read_csv("/kaggle/input/hospital/Dataset/training_data.csv")
data_test = pd.read_csv("/kaggle/input/hospital/Dataset/test_data.csv")
data_dictionary = pd.read_csv("/kaggle/input/hospital/Dataset/data_dictionary.csv")
data_sample_submission = pd.read_csv("/kaggle/input/hospital/Dataset/sample_submission.csv")
data_meta = pd.read_csv("/kaggle/input/hospital/Dataset/metadata.csv")


## Cleaning the data

In [None]:
def clean_data(data):
    #print(data)
    
  
    data.dropna(inplace=True)

    if data['gender'].dtype == 'object':
        print("run")
        data['gender'] = data['gender'].map({'M': 0, 'F': 1})
    if data['facid'].dtype == 'object':
        data['facid'] = data['facid'].map({'A': 0, 'B': 1, "C":2, "D":3,"E":4})
    if data['hemo'].dtype == 'object':
        data['hemo'] = data['hemo'].map({False: 0, True: 1})
    
    data['psychologicaldisordermajor'] = data['psychologicaldisordermajor'].map({False: 0, True: 1})
    data['pneum'] = data['pneum'].map({False: 0, True: 1})

    data['dialysisrenalendstage'] = data['dialysisrenalendstage'].map({False: 0, True: 1})
    
    data['asthma'] = data['asthma'].map({False: 0, True: 1})
    data['irondef'] = data['irondef'].map({False: 0, True: 1})
    data['substancedependence'] = data['substancedependence'].map({False: 0, True: 1})
    data['depress'] = data['depress'].map({False: 0, True: 1})
    data['psychother'] = data['psychother'].map({False: 0, True: 1})
    data['fibrosisandother'] = data['fibrosisandother'].map({False: 0, True: 1})
    data['malnutrition'] = data['malnutrition'].map({False: 0, True: 1})
    
    
    return data



In [None]:
pd.set_option("display.max_columns",None)
cleaned_data = clean_data(data_training);
#cleaned_data.head(2)

## Splitting the data training data set into two sets

In [None]:

x = cleaned_data.drop(columns=['lengthofstay',"vdate"],axis=1)
y = cleaned_data["lengthofstay"]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)

In [None]:
#cleaned_data.isna().sum()

# Create the model

In [None]:
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Training the model

In [None]:
model.fit(x_train,y_train)

## Apply the model to make predictions

In [None]:
y_pred = model.predict(x_test)

In [None]:
np.mean(y_pred)

## Evaluate the model performance

In [None]:
mse = mean_squared_error(y_test,y_pred)
r2 = r2_score(y_test, y_pred)



In [None]:
performance_data = {
    'Metric': ['MER', 'R2',],
    'Score': [mse, r2]
}

performance_df = pd.DataFrame(performance_data)

print(performance_df.to_string(index=False))

In [None]:
data_training.head()

# Create a Gradio web app

In [None]:

import warnings

# Filter out the UserWarnings from Gradio
warnings.filterwarnings("ignore", category=UserWarning, module="gradio")

import random

def generate_random_id():
   
    return random.randint(100, 3000)


#Defining default values for 20 out of 25 attributes 
default_values = {
    "id":generate_random_id(),
    "rcount":2,
    "dialysisrenalendstage":False,
    "pneum":False,
    "substancedependence": False,
    "psychologicaldisordermajor": True,
    "psychother": True,
    "fibrosisandother": False,
    "malnutrition": False,
    "hemo": False,
    "hematocrit": 16,
    "neutrophils": 7,
    "sodium": 136,
    "glucose": 147,
    "bloodureanitro": 20,
    "creatinine": 0.9,
    "pulse": 6.5,
    "respiration": 1,
    "secondarydiagnosisnonicd9": 1,
    "facid": "B",
}

# Define input components for user-specified features
user_input_features = [
    gr.inputs.Textbox(label="What is your BMI?"),
    gr.inputs.Radio(label="What is your gender?", choices=["M", "F"]),
    gr.inputs.Radio(label="Do you have depression?", choices=["True", "False"]),
    gr.inputs.Radio(label="Do you have asthma?",choices=["True", "False"]),
    gr.inputs.Radio(label="Do you have iron deficiency?",choices=["True", "False"]),
]


def predict_some(bmi, gender,depress, asthma, irondef):
    
    input_data = {
        "id": generate_random_id(),
        "rcount": None,
        "gender": gender,
        "dialysisrenalendstage": None,
        "asthma": asthma,
        "irondef": irondef,
        "pneum": None,
        "substancedependence": None,
        "psychologicaldisordermajor": None,
        "depress": depress,
        "psychother": None,
        "fibrosisandother": None,
        "malnutrition": None,
        "hemo": None,
        "hematocrit": None,
        "neutrophils": None,
        "sodium": None,
        "glucose": None,
        "bloodureanitro": None,
        "creatinine": None,
        "bmi": bmi,
        "pulse": None,
        "respiration": None,
        "secondarydiagnosisnonicd9": None,
        "facid": None,
    }

    input_data["asthma"] = input_data["asthma"] == "True"
    input_data["irondef"] = input_data["irondef"] == "True"
    input_data["depress"] = input_data["depress"] == "True"
    print(input_data['bmi'])
    input_data['bmi'] = float(input_data['bmi'])

    for feature, default_value in default_values.items():
        if not input_data[feature]:
            input_data[feature] = default_value

            
       # Convert to pandas dataframe
    #print(input_data)
    df = pd.DataFrame(input_data,index=[0])
    
    
    df["bmi"]
    
    # Clean the input data 
    cleaned_input_data = clean_data(df)

    
    # Make predictions 
    prediction = model.predict(cleaned_input_data)
    print(prediction)
    result = str(np.round(prediction)[0])
    return result + " days"
    


# Create the Gradio interface with the input components
iface = gr.Interface(
   fn=predict_some,
   inputs=user_input_features,
   outputs="text",
   title="Predict Assistant",
   description="Anwser some questions and get prediction on the length of your stay in the hospital.",
)

# Launch the interface
iface.launch()


# Applying the trained model on a new and unseen data set

In [None]:
cleaned_new_data = clean_data(data_test)
cleaned_new_data.head()

In [None]:
# cleaned_new_data.isna().sum()

In [None]:
predictions = model.predict(cleaned_new_data)
df = pd.DataFrame()
df["id"] = cleaned_new_data["id"]
df["lengthofstay"] = predictions

df.to_csv('submission.csv', index=None) 

In [None]:
restult = pd.read_csv("/kaggle/working/submission.csv")
restult.head()
