In [1]:
from xgboost import XGBClassifier
import pandas as pd
import numpy as np


In [2]:
features = [
    'age',
    'diag_Urinary tract infection, site not specified',
    'diag_F10',
    'diag_Type 2 diabetes mellitus without (mention of) complications',
    'lab_Sodium (Moles/volume) in Serum or Plasma',
    'lab_Bilirubin.total (Moles/volume) in Serum or Plasma',
    'diag_Gastrointestinal haemorrhage, unspecified',
    'diag_Acute pancreatitis, unspecified',
    'diag_Tendency to fall, not elsewhere classified',
    'diag_Hepatic failure, unspecified',
    'diag_Schizophrenia, unspecified',
    'diag_Cachexia',
    'diag_Other and unspecified convulsions',
    'diag_Epilepsy, unspecified, not stated as intractable',
    'diag_Dyspnoea'
]

short_names = [
    'Age',
    'UTI',
    'Alcohol',
    'Type 2 Diabetes',
    'Sodium (Moles/volume)',
    'Bilirubin (Moles/volume)',
    'GI bleed',
    'Acute Pancreatitis',
    'Falls',
    'Hepatic Failure',
    'Schizophrenia',
    'Cachexia',
    'Convulsions',
    'Epilepsy',
    'Dyspnoea'
]

df = pd.DataFrame(columns=features)

n_samples = 1000

df['age'] = np.random.randint(18,113, size=n_samples)
df['lab_Sodium (Moles/volume) in Serum or Plasma'] = np.random.uniform(100, 200, size=n_samples)
df['lab_Bilirubin.total (Moles/volume) in Serum or Plasma'] = np.random.uniform(0.1, 2, size=n_samples)
for col in df.columns:
    if col.startswith('diag_'):
        df[col] = np.random.randint(0,2, size=n_samples)

X = df[features]
y = np.random.randint(0, 2, size=n_samples)

clf = XGBClassifier()
clf.fit(X, y)


import pickle 

with open("xgb_model.pkl", "wb") as f:
    pickle.dump(clf, f)

with open("feature_list.pkl", "wb") as f:
    pickle.dump(features, f)

with open("short_names.pkl", "wb") as f:
    pickle.dump(short_names, f)


In [3]:
df.dtypes

age                                                                   int64
diag_Urinary tract infection, site not specified                      int64
diag_F10                                                              int64
diag_Type 2 diabetes mellitus without (mention of) complications      int64
lab_Sodium (Moles/volume) in Serum or Plasma                        float64
lab_Bilirubin.total (Moles/volume) in Serum or Plasma               float64
diag_Gastrointestinal haemorrhage, unspecified                        int64
diag_Acute pancreatitis, unspecified                                  int64
diag_Tendency to fall, not elsewhere classified                       int64
diag_Hepatic failure, unspecified                                     int64
diag_Schizophrenia, unspecified                                       int64
diag_Cachexia                                                         int64
diag_Other and unspecified convulsions                                int64
diag_Epileps