# RFC API Development


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from xgboost import XGBClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score, classification_report, recall_score, f1_score
from sklearn.preprocessing import StandardScaler, PowerTransformer
from sklearn.impute import SimpleImputer

## 5.2 Load the model (Testing)

In [3]:
import pickle

model_type = "RFC_model" # This is the name for the filename of the model

input_file = f"hospitalization-{model_type}.bin" # Use the same name than above

with open(input_file, "rb") as f_in:
    transformers_imported, model_imported = pickle.load(f_in)

transformers_imported, model_imported

({'imputer': SimpleImputer(strategy='most_frequent'),
  'scaler': StandardScaler(),
  'transformer': PowerTransformer()},
 RandomForestClassifier(n_jobs=-1))

In [4]:
input_file = "df_s-dataset.bin"
with open(input_file, "rb") as f_in:
    df_s_imported = pickle.load(f_in)
df_s_imported.head()

Unnamed: 0,pahosp1y,pahip50e,padadage,pamomage,pahibpe,parespe,pacholst,pagrossaa,paosleep,pafallinj,...,pastroke,pasmokev,pahearaid,pameds,paheight,paglasses,paeat,papaina,pagender,padrinkb
0,1.0,1.0,78.0,60.0,1.0,0.0,1.0,0.0,,1.0,...,0.0,1.0,0.0,0.0,1.73,0.0,,0.0,1.0,0.0
1,0.0,0.0,,70.0,0.0,0.0,1.0,0.0,,0.0,...,0.0,0.0,0.0,0.0,,0.0,,,2.0,0.0
2,0.0,0.0,83.0,32.0,0.0,0.0,0.0,0.0,,,...,0.0,1.0,0.0,0.0,,1.0,0.0,1.0,1.0,0.0
4,0.0,0.0,75.0,82.0,0.0,0.0,0.0,0.0,,0.0,...,0.0,1.0,0.0,0.0,1.77,0.0,,,1.0,1.0
6,0.0,0.0,78.0,85.0,1.0,0.0,0.0,1.0,,1.0,...,0.0,0.0,0.0,0.0,,0.0,0.0,,1.0,0.0


In [5]:
# Import transformer objects from pickle
imputer_imported = transformers_imported["imputer"]
scaler_imported = transformers_imported["scaler"]
transformer_imported = transformers_imported["transformer"]

In [6]:
def extract_patient(pos, df):    
    """
    Extracts the target variable and patient information from the dataframe.

    Parameters:
    pos (int): The position of the patient in the dataframe.
    df (pandas.DataFrame): The dataframe containing patient information.

    Returns:
    tuple: A tuple containing the target variable (y_patient) and a dictionary (patient_dict) 
        representing the patient information.
    """
    target = "pahosp1y"
    y_patient = df[target].iloc[pos]
    # print(y_patient) # Print it the patient was hospitalizated 

    # Convert patient to dict similar to format of the json in the api
    patient_dict = df.drop(target, axis=1).iloc[pos].to_dict()
    # print(patient_dict)
    return y_patient, patient_dict

In [7]:
def predict_patient(patient_dict, transformers, model):
    """
    Predicts the hospitalization probability for a given patient.

    Parameters:
    - patient_dict (dict): A dictionary containing the patient's information.
    - transformers (dict): A dictionary containing the transformers used for data preprocessing.
    - model: The trained machine learning model.

    Returns:
    - hospitalization (bool): True if the patient is predicted to be hospitalized, False otherwise.
    - y_patient_pred (float): The predicted probability of hospitalization for the patient.
    """
    
    # From patient_dict
    df_patient = pd.Series(patient_dict).to_frame().T

    imputer = transformers["imputer"]
    scaler = transformers["scaler"]
    transformer = transformers["transformer"]

    ## Now we apply the transformations to the single patient dataframe
    X_patient = imputer.transform(df_patient)

    # Scaling the patient data
    X_patient = scaler.transform(X_patient)

    # Normalizing the patient data
    X_patient = transformer.transform(X_patient)

    y_patient_pred = model.predict_proba(X_patient)[0, 1]
    hospitalization = y_patient_pred >= 0.5

    return hospitalization, y_patient_pred

### Test model with a sample patient

In [10]:
target = "pahosp1y"
df_s_imported[df_s_imported[target] == 1].head(10)

Unnamed: 0,pahosp1y,pahip50e,padadage,pamomage,pahibpe,parespe,pacholst,pagrossaa,paosleep,pafallinj,...,pastroke,pasmokev,pahearaid,pameds,paheight,paglasses,paeat,papaina,pagender,padrinkb
0,1.0,1.0,78.0,60.0,1.0,0.0,1.0,0.0,,1.0,...,0.0,1.0,0.0,0.0,1.73,0.0,,0.0,1.0,0.0
18,1.0,0.0,70.0,,1.0,0.0,0.0,0.0,,1.0,...,1.0,1.0,0.0,0.0,,0.0,0.0,1.0,2.0,0.0
75,1.0,1.0,77.0,70.0,0.0,0.0,1.0,0.0,,,...,0.0,1.0,0.0,0.0,1.6,0.0,0.0,1.0,1.0,0.0
76,1.0,0.0,83.0,72.0,0.0,0.0,1.0,1.0,,1.0,...,0.0,0.0,0.0,0.0,1.3,0.0,0.0,0.0,2.0,0.0
78,1.0,1.0,,80.0,1.0,0.0,0.0,0.0,,0.0,...,0.0,0.0,0.0,0.0,,0.0,,,2.0,0.0
107,1.0,1.0,62.0,101.0,0.0,0.0,,,2.0,1.0,...,1.0,1.0,0.0,,1.52,0.0,0.0,,1.0,0.0
111,1.0,0.0,70.0,65.0,1.0,0.0,1.0,0.0,,0.0,...,0.0,1.0,0.0,0.0,1.65,0.0,0.0,,1.0,0.0
114,1.0,0.0,51.0,50.0,0.0,0.0,1.0,0.0,,,...,0.0,0.0,0.0,0.0,1.67,1.0,,0.0,1.0,0.0
145,1.0,0.0,64.0,88.0,1.0,0.0,1.0,0.0,,0.0,...,0.0,1.0,0.0,0.0,1.6,1.0,0.0,,1.0,0.0
162,1.0,1.0,,75.0,0.0,0.0,1.0,0.0,,1.0,...,0.0,0.0,0.0,0.0,,0.0,,1.0,1.0,0.0


In [13]:
idx = 78  
y_patient = df_s_imported.loc[idx].iloc[0:1][target].iloc[0]
print(y_patient) # Print it the patient was hospitalizated 

# Convert patient to dict similar to format of the json in the api
patient_dict = df_s_imported.loc[idx].iloc[0:1].drop(target, axis=1).to_dict(orient="records")[0]
print(patient_dict)

1.0
{'pahip50e': 1.0, 'padadage': nan, 'pamomage': 80.0, 'pahibpe': 1.0, 'parespe': 0.0, 'pacholst': 0.0, 'pagrossaa': 0.0, 'paosleep': nan, 'pafallinj': 0.0, 'pacancre': 0.0, 'paswell': 1.0, 'parafaany': nan, 'pawheeze': 0.0, 'paagey': 55.0, 'paarthre': 0.0, 'padiabe': 0.0, 'parjudg': nan, 'paweight': 54.0, 'paclims': 0.0, 'pastroke': 0.0, 'pasmokev': 0.0, 'pahearaid': 0.0, 'pameds': 0.0, 'paheight': nan, 'paglasses': 0.0, 'paeat': nan, 'papaina': nan, 'pagender': 2.0, 'padrinkb': 0.0}


In [14]:
y_patient, predict_patient(patient_dict, transformers_imported, model_imported)

(1.0, (True, 0.68))

### 5.2.1 Working with json format

In [15]:
def clean_patient_dict(patient_dict):
    patient_clean = patient_dict.copy()
    for key, value in patient_clean.items():
        if isinstance(value, np.ndarray):
            patient_clean[key] = value.tolist()  # Convert ndarray to list for JSON compatibility
        if isinstance(value, float) and np.isnan(value):
            patient_clean[key] = ""  # Replace np.nan with string ""

    return patient_clean

In [16]:
patient_clean = clean_patient_dict(patient_dict)

In [17]:
import json

json_filename = "rfc-single_patient"
with open(f"{json_filename}.json", "w") as fp:
    json.dump(patient_clean, fp) 

### Test using json

In [18]:
input_file = "rfc-single_patient.json"

with open(input_file, "rb") as f_in:
    patient_json = json.load(f_in)
print(patient_json)

{'pahip50e': 1.0, 'padadage': '', 'pamomage': 80.0, 'pahibpe': 1.0, 'parespe': 0.0, 'pacholst': 0.0, 'pagrossaa': 0.0, 'paosleep': '', 'pafallinj': 0.0, 'pacancre': 0.0, 'paswell': 1.0, 'parafaany': '', 'pawheeze': 0.0, 'paagey': 55.0, 'paarthre': 0.0, 'padiabe': 0.0, 'parjudg': '', 'paweight': 54.0, 'paclims': 0.0, 'pastroke': 0.0, 'pasmokev': 0.0, 'pahearaid': 0.0, 'pameds': 0.0, 'paheight': '', 'paglasses': 0.0, 'paeat': '', 'papaina': '', 'pagender': 2.0, 'padrinkb': 0.0}


In [19]:
def revive_nan(data):
    """
    Recursively replaces empty strings with NaN values in a nested dictionary or list.

    Parameters:
    data (dict or list): The input data to be processed.

    Returns:
    dict or list: The processed data with empty strings replaced by NaN values.
    """
    if isinstance(data, dict):
        for key, value in data.items():
            data[key] = revive_nan(value)
    elif isinstance(data, list):
        for i, value in enumerate(data):
            data[i] = revive_nan(value)
    elif data == "":
        return np.nan
    else:
        return data

In [20]:
revive_nan(patient_json)
patient_json

{'pahip50e': 1.0,
 'padadage': nan,
 'pamomage': 80.0,
 'pahibpe': 1.0,
 'parespe': 0.0,
 'pacholst': 0.0,
 'pagrossaa': 0.0,
 'paosleep': nan,
 'pafallinj': 0.0,
 'pacancre': 0.0,
 'paswell': 1.0,
 'parafaany': nan,
 'pawheeze': 0.0,
 'paagey': 55.0,
 'paarthre': 0.0,
 'padiabe': 0.0,
 'parjudg': nan,
 'paweight': 54.0,
 'paclims': 0.0,
 'pastroke': 0.0,
 'pasmokev': 0.0,
 'pahearaid': 0.0,
 'pameds': 0.0,
 'paheight': nan,
 'paglasses': 0.0,
 'paeat': nan,
 'papaina': nan,
 'pagender': 2.0,
 'padrinkb': 0.0}

In [21]:
y_patient, predict_patient(patient_json, transformers_imported, model_imported)

(1.0, (True, 0.68))