In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

# linear algebra
# data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
# Standard libraries

import os

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Third libraries 
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import joblib

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
!pwd

In [None]:
!which python

# Loading the data

In [None]:
data_training = pd.read_csv("/kaggle/input/hospital/Dataset/training_data.csv")
data_test = pd.read_csv("/kaggle/input/hospital/Dataset/test_data.csv")
data_dictionary = pd.read_csv("/kaggle/input/hospital/Dataset/data_dictionary.csv")
data_sample_submission = pd.read_csv("/kaggle/input/hospital/Dataset/sample_submission.csv")
data_meta = pd.read_csv("/kaggle/input/hospital/Dataset/metadata.csv")

In [None]:
data_training.head(5)

# Inspect data

When loading tabular data as a Pandas dataframe, your first step will typically be to inspect the data using the head, info, and describe methods. That'll tell you a lot about the data set. What the features are, whether you can understand the features and problem without further information, the variation in the feature and label values, a first look for missing values, and so on.

In [None]:
data_training.head()

In [None]:
data_training.info()

In [None]:
data_training.describe(include='all')

In [None]:
data_training.lengthofstay.unique()

In [None]:
data_test.info()

In [None]:
data_test.describe(include='all')

# Prepare the data

In [None]:
data_training.shape

In [None]:
data_test.shape

In [None]:
data_training['gender'] = data_training['gender'].map({'M': 1, 'F': 2})
data_training['facid'] = data_training['facid'].map({'A': 1, 'B': 2, "C":3, "D":4,"E":5})
data_training['psychologicaldisordermajor'] = data_training['psychologicaldisordermajor'].map({False: 0, True: 1})
data_training['pneum'] = data_training['pneum'].map({False: 0, True: 1})

data_training['dialysisrenalendstage'] = data_training['dialysisrenalendstage'].map({False: 0, True: 1})
data_training['asthma'] = data_training['asthma'].map({False: 0, True: 1})
data_training['irondef'] = data_training['irondef'].map({False: 0, True: 1})
data_training['substancedependence'] = data_training['substancedependence'].map({False: 0, True: 1})
data_training['depress'] = data_training['depress'].map({False: 0, True: 1})
data_training['psychother'] = data_training['psychother'].map({False: 0, True: 1})
data_training['fibrosisandother'] = data_training['fibrosisandother'].map({False: 0, True: 1})
data_training['malnutrition'] = data_training['malnutrition'].map({False: 0, True: 1})
data_training['hemo'] = data_training['hemo'].map({False: 0, True: 1})

In [None]:
pd.set_option('display.max_columns', None)
data_training.head(3)

In [None]:
data_training.dropna(inplace=True)

In [None]:
data_training.isna().sum()

# Explore the data

Exploring data is an important step of any machine learning project. You'll learn much more about it in Assignment 1. Here's a short list of things to think about:

- Explore the class labels
- Explore correlations between the features, and between features and label.
- Look for missing values
- Look for outliers
-  ...

## Look for missing values

Missing values are often coded using special values like "-1" or strings like "unknown" or "N/A". Those aren't necessarily picked up by looking for missing values in the output of the info method.

In [None]:
# A demo of what kind of simple checks one can do to look for 
# missing values
for col in data_training.columns:
    if data_training[col].dtype == 'lengthofstay':
        print(col)
        print(data_training[col].unique())
        print("#"*40)
    elif data_training[col].dtype != 'lengthofstay':
        print(col)
        print(np.max(data_training[col]), np.min(data_training[col]))
        print("#"*40)

# Test the data, Preperation

In [None]:
data_test.head(2)

In [None]:
#data_test['gender'] = data_test['gender'].map({'M': 1, 'F': 2})
#data_test['facid'] = data_test['facid'].map({'A': 1, 'B': 2, "C":3, "D":4,"E":5})
data_test['psychologicaldisordermajor'] = data_test['psychologicaldisordermajor'].map({False: 0, True: 1})
data_test['pneum'] = data_test['pneum'].map({False: 0, True: 1})

data_test['dialysisrenalendstage'] = data_test['dialysisrenalendstage'].map({False: 0, True: 1})
data_test['asthma'] = data_test['asthma'].map({False: 0, True: 1})
data_test['irondef'] = data_test['irondef'].map({False: 0, True: 1})
data_test['substancedependence'] = data_test['substancedependence'].map({False: 0, True: 1})
data_test['depress'] = data_test['depress'].map({False: 0, True: 1})
data_test['psychother'] = data_test['psychother'].map({False: 0, True: 1})
data_test['fibrosisandother'] = data_test['fibrosisandother'].map({False: 0, True: 1})
data_test['malnutrition'] = data_test['malnutrition'].map({False: 0, True: 1})
#data_test['hemo'] = data_test['hemo'].map({False: 0, True: 1})

In [None]:
data_test.head(2)

In [None]:
data_test.dropna(inplace=True)

In [None]:
data_test.isna().sum()

In [None]:
x_train = data_training.drop(columns=['lengthofstay',"vdate"])
y_train = data_training['lengthofstay']

#x_test = data_test.copy()

In [None]:
x_train.isna().sum()

# Creating the model

In [None]:
model = LinearRegression()

# Training model

In [None]:
# Here we will write code to train the model
model.fit(x_train,y_train)

In [None]:
data_test.head()

# Apply model

In [None]:
data_test.shape

In [None]:
# Here we will the code to make the predictions 
y_pred = model.predict(data_test)

In [None]:
df = pd.DataFrame(y_pred).describe()

# Save model

In [None]:
joblib.dump(model, 'mymodel.pkl')


# Prepare the data for the machine learning models you plan to use

Many models depend on having the feature values on the same scale (we'll understand what characterises these later in the course). We typically have to perform some feature scaling to get them on similar scales. It's not necessary for tree-based models like random forests and the like (we'll understand why when we look into details of how these work).

In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [None]:
features = ["rcount", "gender", "dialysisrenalendstage", "asthma", "irondef", "pneum", "substancedependence", "psychologicaldisordermajor", "depress", "psychother", "fibrosisandother", "malnutrition", "hemo", "hematocrit", "neutrophils", "sodium", "glucose", "bloodureanitro", "creatinine", "bmi", "pulse", "respiration", "secondarydiagnosisnonicd9", "facid"]
X_train = data_training[features]
X_test = data_test[features]
Y_train = data_training["lengthofstay"]

In [None]:
X = X_train
X = X_test
y = Y_train

In [None]:
model = LinearRegression()
model.fit(X_train, Y_train)
predictions = model.predict(X_test)

In [None]:
std = StandardScaler()
std.fit(X)

In [None]:
X_std = std.transform(X)

In [None]:
type(X_std)

In [None]:
X_std = pd.DataFrame(data=X_std, columns = X.columns)

In [None]:
X.head()

In [None]:
X_std.head()

# Evaluation setup

We'll drop this here, but as mentioned in the lecture, you should in general design a good evaluation setup for your models. You don't want to have the Kaggle test set as your only way to estimate generalisation performance. We'll learn more about evaluating classifiers later in this module.

# Machine learning models

In [None]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [None]:
rf = RandomForestClassifier()
gb = GradientBoostingClassifier()

In [None]:
#rf.fit(X, y)

In [None]:
#gb.fit(X, y)

# Submission

After training your model (or models) and producing predictions for the test data, you must construct a CSV file in the correct form that Kaggle can use to compute a score. It should have the same form as the file sample__submission.csv:

In [None]:
submission = data_test[["id"]]
submission

In [None]:
submission["lengthofstay"] = predictions

In [None]:
submission.head()

In [None]:
submission

In [None]:
submission.to_csv("submission.csv", index = None)

In [None]:
pd.read_csv("submission.csv")

# GRADIO start


In [None]:
pip install gradio 

In [None]:
import gradio as gr
import pandas as pd
import joblib

In [None]:
'''def load_and_display_csv(upload, delimeter=";", header=None, names=["Id", "lengthofstay"], encodings="UTF-8"):
    
#df = pd.read_csv(upload, delimeter=";")
#df = pd.read_csv(upload, header=None, names=["Id", "lengthofstay"])
#df = pd.read_csv(upload, encodings="UTF-8")
    
    if upload is not None:
        try:
            df = pd.read_csv(upload)
            return df.to_html()
        except Exception as e:
            return str(e)
        
        else:
            return "Upload a CSV file."
'''

In [None]:
'''def predict_length_of_stay(input_data):
    # Preprocess input_data as needed
    prediction = model.predict(input_data)
    return prediction
'''

# Gradio interface

In [None]:
'''interface = gr.Interface(
    fn = load_and_display_csv,
    inputs = gr.inputs.File(label="Upload a CSV file"),
    outputs = gr.outputs.HTML(),
    title = "CSV File Viewer",
    description = "Upload a CSV file to view its content",
)
'''

# Launch Gradio Interface

In [None]:
import gradio as gr
import pandas as pd
import joblib  # or any other library for loading your machine learning model
import re  # Import the regular expression library

# Load your machine learning model (replace with your specific code)
model = joblib.load('/kaggle/working/mymodel.pkl')

# Define a regex pattern for a positive integer
positive_integer_pattern = r"^[0-9]\d*$"

# Create a function to predict length of stay based on "id"
def predict_length_of_stay(input_id):
    try:
        # Check if the input "id" matches the regex pattern for a positive integer
        if re.match(positive_integer_pattern, input_id):
            input_id = int(input_id)
            
            # Create a DataFrame with the input "id"
            input_data = pd.DataFrame({'id': [input_id]})
            
            # Use your loaded model to make predictions
            prediction = model.predict(input_data)
            
            return f"Predicted Length of Stay for ID {input_id}: {prediction[0]} days"
        else:
            return f"Invalid input. Please enter a positive integer for 'id'. (Input: {input_id})"
    except Exception as e:
        return str(e)

# Create a Gradio interface
interface = gr.Interface(
    fn=predict_length_of_stay,
    inputs=gr.inputs.Textbox(type="text", label="Enter ID (Positive Integer)"),
    outputs="text",
    title="Patient Length of Stay Predictor",
    description="Enter an 'id' (positive integer) to predict length of stay.",
)

In [None]:
interface.launch(inline=True)