In [None]:
import os

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind
import seaborn as sns

In [None]:
import sqlalchemy

engine = sqlalchemy.create_engine(os.getenv('SQLALCHEMY_CONNECTION_STRING'))
with open('get_nicu_bmi.sql') as sql_fh:
    sql = sql_fh.read()

dat = pd.read_sql(sql,con=engine)

In [None]:
dat = pd.read_csv('get_nicu_admission_05_16.csv')

In [None]:
# assemble bmi, if not measured then calculate from height/weight
dat['bmi'] = np.where(dat.earliest_bmi.isna(), dat.computed_bmi, dat.earliest_bmi)
print(len(dat))
#dat.head()

In [None]:
# select only nicu and bmi is not null
dat_selected = dat[(dat.nicu_admission.isna()==False) & (dat.bmi.isna()==False)].copy()
dat_selected['year'] = pd.to_datetime(dat.preg_end_date).dt.year
len(dat_selected)

In [None]:
# double check
dat_selected.bmi.isna().mean(), dat_selected.nicu_admission.isna().mean()

In [None]:
def bmi_group(x):
    '''
      convert bmi to groups
    '''
    if x < 18.5:
        return 'underweight'
    elif x < 25:
        return 'healthy'
    elif x < 30:
        return 'overweight'
    elif x < 35:
        return 'obese_1'
    elif x < 40:
        return 'obese_2'
    else:
        return 'obese_3'

In [None]:
dat_selected['bmi_group'] = dat_selected['bmi'].apply(bmi_group)
# dat_selected

In [None]:
# get the values
group_ref = dat_selected[dat_selected.bmi_group=='healthy'].nicu_admission.values
for bmi_group in ['underweight','healthy','overweight','obese_1','obese_2','obese_3']:
    group_compare = dat_selected[dat_selected.bmi_group==bmi_group].nicu_admission.values
    p_val = ttest_ind(group_ref, group_compare).pvalue
    print(f"{bmi_group} count={len(group_compare)} nicu_count={group_compare.sum()} ratio={group_compare.mean()} p={p_val}")

In [None]:
dat_selected['obesity'] = np.where(dat_selected.bmi >= 30, 'yes', 'no')
#dat_selected.head()

In [None]:
# plot as a function of year
# there're only 2 records for 2018, so remove them
tmp = dat_selected[dat_selected.year>2018].groupby(['year','obesity'])['nicu_admission'].value_counts(normalize=True).rename('percentage').reset_index()
tmp

In [None]:
sns.catplot(tmp[tmp.nicu_admission==1], x='year', y='percentage', hue='obesity', kind='bar')
plt.ylabel('NICU admission rate')

In [None]:
# get BMI < 30 and >= 30 to compare with paper
group1 = dat_selected[dat_selected.bmi >= 30].nicu_admission
group2 = dat_selected[dat_selected.bmi < 30].nicu_admission
print(ttest_ind(group_ref, group_compare).pvalue)
(dat_selected[dat_selected.bmi < 30].nicu_admission.value_counts(),
dat_selected[dat_selected.bmi >= 30].nicu_admission.value_counts())