# Periodontal Health and Falls

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

---

## load merge of oral health and dizziness, falls, and hospitaliztion data

In [2]:
df = pd.read_csv('../data/2003-2004/output/patient_tooth_cal_measurments2.csv')
df.shape

(5303, 19)

In [3]:
df.head()

Unnamed: 0,VAR1,SEQN,RIDAGEYR,RIAGENDR,WTINT2YR,WTMEC2YR,SDMVPSU,SDMVSTRA,num_teeth,num_teeth_gte_3,pct_teeth_gte_3,num_teeth_gte_4,pct_teeth_gte_4,num_teeth_gte_5,pct_teeth_gte_5,BAQ020A,BAQ020B,BAQ020C,HUQ071
0,0,21005,19,Male,5512.320949,5824.782465,2,39,28,0,0.0,0,0.0,0,0.0,,,,2
1,1,21009,55,Male,97593.67898,97731.72724,2,31,24,0,0.0,0,0.0,0,0.0,,,,2
2,2,21010,52,Female,39599.36269,43286.57647,1,29,19,4,0.21,3,0.16,1,0.05,2.0,2.0,1.0,2
3,3,21012,63,Male,12629.44048,12947.33814,2,33,19,10,0.53,10,0.53,7,0.37,2.0,1.0,2.0,2
4,4,21015,83,Male,17625.87573,19994.15986,2,33,25,0,0.0,0,0.0,0,0.0,2.0,1.0,1.0,2


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5303 entries, 0 to 5302
Data columns (total 19 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   VAR1             5303 non-null   int64  
 1   SEQN             5303 non-null   int64  
 2   RIDAGEYR         5303 non-null   int64  
 3   RIAGENDR         5303 non-null   object 
 4   WTINT2YR         5303 non-null   float64
 5   WTMEC2YR         5303 non-null   float64
 6   SDMVPSU          5303 non-null   int64  
 7   SDMVSTRA         5303 non-null   int64  
 8   num_teeth        5303 non-null   int64  
 9   num_teeth_gte_3  5303 non-null   int64  
 10  pct_teeth_gte_3  5303 non-null   float64
 11  num_teeth_gte_4  5303 non-null   int64  
 12  pct_teeth_gte_4  5303 non-null   float64
 13  num_teeth_gte_5  5303 non-null   int64  
 14  pct_teeth_gte_5  5303 non-null   float64
 15  BAQ020A          861 non-null    float64
 16  BAQ020B          861 non-null    float64
 17  BAQ020C       

## plot results

**help function for determining age groups**

In [5]:
def get_age_group(age):
    age_group = ''
    if age >= 18 and age <= 25:
        age_group = '18-25'
    elif age >= 26 and age <= 35:
        age_group = '26-35'
    elif age >= 36 and age <= 45:
        age_group = '36-45'
    elif age >= 46 and age <= 55:
        age_group = '46-55'
    elif age >= 56 and age <= 65:
        age_group = '56-65'
    elif age >= 66 and age <= 75:
        age_group = '66-75'
    else:
        age_group = '76+'
    
    return age_group

def get_percent_group(percent):
    percent_group = ''
    
    if percent >= 0.0 and  percent <=0.10:
        percent_group = '0-10'
    elif percent >=0.11 and percent <= 0.20:
        percent_group = '11-20'
    elif percent > 0.21 and percent <= 0.30:
        percent_group = '21-30'
    elif percent >= 0.31 and percent <=0.40:
        percent_group = '31-40'
    elif percent >= 0.41 and percent <=0.50:
        percent_group = '41-50'
    elif percent >= 0.51 and percent <=0.60:
        percent_group = '51-60'
    elif percent >= 0.61 and percent <=0.70:
        percent_group = '61-70'
    elif percent >= 0.71 and percent <=0.80:
        percent_group = '71-80'
    elif percent >= 0.81 and percent <=0.90:
        percent_group = '81-90'
    else:
        percent_group = '90+'

    return percent_group

### default figure settings

In [6]:
%matplotlib inline
sns.set_theme(
    context='notebook', style='darkgrid', palette='deep', font='sans-serif', font_scale=1, color_codes=True, rc=None
)

## standardize variables  

BAQ020A       Dizziness problems in past year  
BAQ020B       Balance problems in past year  
BAQ020C       Falling problems in past year  
HUQ071        Overnight hospital patient in last year  

Set 1=yes, (not 1)=0  
Note: value counts where checked before doing this. There was a handfule of value 9="I don't know", but setting those to 0.

BAQ020A had 0 values of 9.  
BAQ020C had 1 values of 9.  
BAQ020C had 0 values of 9.  
HUQ071 had 4 values of 9.  

In [7]:
plot_df = df.copy() # copy original dataframe

In [8]:
plot_df.BAQ020A = np.where(plot_df.BAQ020A==1, 1, 0)
plot_df.BAQ020B = np.where(plot_df.BAQ020B==1, 1, 0)
plot_df.BAQ020C = np.where(plot_df.BAQ020C==1, 1, 0)
plot_df.HUQ071 = np.where(plot_df.HUQ071==1, 1, 0)

In [9]:
plot_df = plot_df.rename(
    columns={
        'RIDAGEYR':'age', 
        'RIAGENDR': 'gender', 
        'num_teeth_gte_3':
        '# CAL >= 3', 
        'pct_teeth_gte_3': 
        '% CAL >= 3',
        'num_teeth_gte_4':
        '# CAL >= 4', 
        'pct_teeth_gte_4': 
        '% CAL >= 4',
        'num_teeth_gte_5':
        '# CAL >= 5', 
        'pct_teeth_gte_5': 
        '% CAL >= 5',
        'BAQ020A': 'dizziness past year',
        'BAQ020B': 'balance past year',
        'BAQ020C': 'falling past year',
        'HUQ071': 'hospital past year'
    }
)
plot_df['age group'] = plot_df['age'].map(get_age_group)
plot_df['pct CAL >= 3'] = plot_df['% CAL >= 3'].map(get_percent_group)
plot_df['pct CAL >= 4'] = plot_df['% CAL >= 4'].map(get_percent_group)
plot_df['pct CAL >= 5'] = plot_df['% CAL >= 5'].map(get_percent_group)

In [10]:
plot_df['pct CAL >= 4'].value_counts()

pct CAL >= 4
0-10     4923
11-20     174
21-30      77
31-40      65
41-50      39
90+        13
51-60      11
61-70       1
Name: count, dtype: int64

## explore data

In [11]:
plt.figure(figsize=(8,3))

# age_order_ = ['76+', '66-75', '56-65', '46-55', '36-45', '26-35', '18-25'] 
# age_order_ = ['18-25', '26-35','36-45','46-55','56-65', '66-75', '76+']
age_order = ['36-45','46-55','56-65', '66-75', '76+']

sns.barplot(data=plot_df[plot_df.age > 35], x='age group', y='falling past year', order=order_, errorbar=None, hue='gender', estimator='mean')
plt.show()

NameError: name 'order_' is not defined

<Figure size 800x300 with 0 Axes>

In [None]:
plt.figure(figsize=(8,3))
pct_order = ['0-10','11-20','21-30','31-40','41-50','51-60','61-70','71-80','81-90','90+']

sns.barplot(
    data=plot_df[plot_df.age > 35], 
    x='pct CAL >= 3', y='falling past year', 
    order=pct_order, 
    errorbar=None, 
    hue='gender', 
    estimator='sum'
)
plt.show()

In [None]:
plt.figure(figsize=(8,3))

# age_order_ = ['18-25', '26-35','36-45','46-55','56-65', '66-75', '76+']
age_order = ['36-45','46-55','56-65', '66-75', '76+']

sns.barplot(data=plot_df[plot_df.age > 35], x='age group', y='dizziness past year', order=age_order, errorbar=None, hue='gender', estimator='mean')
plt.show()

In [None]:
plt.figure(figsize=(8,3))

# age_order_ = ['18-25', '26-35','36-45','46-55','56-65', '66-75', '76+']
age_order = ['36-45','46-55','56-65', '66-75', '76+']

sns.barplot(data=plot_df[plot_df.age > 35], x='age group', y='balance past year', order=age_order, errorbar=None, hue='gender', estimator='mean')
plt.show()

In [None]:
plt.figure(figsize=(8,3))

# age_order_ = ['18-25', '26-35','36-45','46-55','56-65', '66-75', '76+']
age_order = ['36-45','46-55','56-65', '66-75', '76+']

sns.barplot(data=plot_df[plot_df.age > 35], x='age group', y='hospital past year', order=age_order, errorbar=None, hue='gender', estimator='mean')
plt.show()

In [None]:
plt.figure(figsize=(8,3))

sns.barplot(data=plot_df, y='% CAL >= 3', x='falling past year', errorbar=None, hue='gender', estimator='mean')
plt.show()

In [None]:
# age_order = ['18-25', '26-35','36-45','46-55','56-65', '66-75', '76+']
age_order = ['46-55','56-65', '66-75', '76+']


fig, axs = plt.subplots(1, 3, figsize=(14, 3), sharex=True, sharey=True, tight_layout=True)

sns.barplot(data=plot_df, y='% CAL >= 3', x='falling past year', errorbar=None, hue='gender', estimator='mean', ax=axs[0])
axs[0].set_title('% Teeth with CAL >= 3')
axs[0].legend().set_visible(False)

sns.barplot(data=plot_df, y='% CAL >= 4', x='falling past year', errorbar=None, hue='gender', estimator='mean', ax=axs[1])
axs[1].set_title('% Teeth with CAL >= 4')

sns.barplot(data=plot_df, y='% CAL >= 5', x='falling past year', errorbar=None, hue='gender', estimator='mean', ax=axs[2])
axs[2].set_title('% Teeth with CAL >= 5')
axs[2].legend().set_visible(False)

axs[0].set_ylabel('percent')

plt.savefig('../figures/falling-past-year-by-percent-CAL.png', dpi=600)
plt.show()

In [None]:
# age_order = ['18-25', '26-35','36-45','46-55','56-65', '66-75', '76+']
age_order = ['46-55','56-65', '66-75', '76+']
temp_df = plot_df[plot_df['age group'].isin(age_order)]

fig, axs = plt.subplots(1, 3, figsize=(14, 3), sharex=True, sharey=True, tight_layout=True)

sns.barplot(
    data=temp_df, y='% CAL >= 3', x='falling past year', errorbar=None, hue='age group', estimator='mean', ax=axs[0], hue_order=age_order
)
axs[0].set_title('% Teeth with CAL >= 3')
axs[0].legend().set_visible(False)

sns.barplot(
    data=temp_df, y='% CAL >= 4', x='falling past year', errorbar=None, hue='age group', estimator='mean', ax=axs[1], hue_order=age_order
)
axs[1].set_title('% Teeth with CAL >= 4')

sns.barplot(
    data=temp_df, y='% CAL >= 5', x='falling past year', errorbar=None, hue='age group', estimator='mean', ax=axs[2], hue_order=age_order
)
axs[2].set_title('% Teeth with CAL >= 5')
axs[2].legend().set_visible(False)

axs[0].set_ylabel('percent')

plt.savefig('../figures/falling-past-year-by-percent-CAL-age-groups.png', dpi=600)
plt.show()

In [None]:
# Look at Males

# age_order = ['18-25', '26-35','36-45','46-55','56-65', '66-75', '76+']
age_order = ['46-55','56-65', '66-75', '76+']
temp_df = plot_df[(plot_df['age group'].isin(age_order)) & (plot_df.gender=='Male')] # subset to Male

fig, axs = plt.subplots(1, 3, figsize=(14, 3), sharex=True, sharey=True, tight_layout=True)

sns.barplot(
    data=temp_df, y='% CAL >= 3', x='falling past year', errorbar=None, hue='age group', estimator='mean', ax=axs[0], hue_order=age_order
)
axs[0].set_title('Males % Teeth with CAL >= 3')
axs[0].legend().set_visible(False)

sns.barplot(
    data=temp_df, y='% CAL >= 4', x='falling past year', errorbar=None, hue='age group', estimator='mean', ax=axs[1], hue_order=age_order
)
axs[1].set_title('Males % Teeth with CAL >= 4')
axs[1].legend().set_visible(False)

sns.barplot(
    data=temp_df, y='% CAL >= 5', x='falling past year', errorbar=None, hue='age group', estimator='mean', ax=axs[2], hue_order=age_order
)
axs[2].set_title('Males % Teeth with CAL >= 5')

axs[0].set_ylabel('percent')

plt.savefig('../figures/falling-past-year-by-percent-CAL-age-groups-males.png', dpi=600)
plt.show()

In [None]:
# Look at Females

# age_order = ['18-25', '26-35','36-45','46-55','56-65', '66-75', '76+']
age_order = ['46-55','56-65', '66-75', '76+']
temp_df = plot_df[(plot_df['age group'].isin(age_order)) & (plot_df.gender=='Female')] # subset to Female

fig, axs = plt.subplots(1, 3, figsize=(14, 3), sharex=True, sharey=True, tight_layout=True)

sns.barplot(
    data=temp_df, y='% CAL >= 3', x='falling past year', errorbar=None, hue='age group', estimator='mean', ax=axs[0], hue_order=age_order
)
axs[0].set_title('Females % Teeth with CAL >= 3')
axs[0].legend().set_visible(False)

sns.barplot(
    data=temp_df, y='% CAL >= 4', x='falling past year', errorbar=None, hue='age group', estimator='mean', ax=axs[1], hue_order=age_order
)
axs[1].set_title('Females % Teeth with CAL >= 4')
axs[1].legend().set_visible(False)

sns.barplot(
    data=temp_df, y='% CAL >= 5', x='falling past year', errorbar=None, hue='age group', estimator='mean', ax=axs[2], hue_order=age_order
)
axs[2].set_title('Females % Teeth with CAL >= 5')
# axs[2].legend().set_visible(False)

axs[0].set_ylabel('percent')

plt.savefig('../figures/falling-past-year-by-percent-CAL-age-groups-females.png', dpi=600)
plt.show()

In [None]:
# age_order = ['76+', '66-75', '56-65', '46-55', '36-45', '26-35', '18-25'] 
# age_order = ['18-25', '26-35','36-45','46-55','56-65', '66-75', '76+']
age_order = ['46-55','56-65', '66-75', '76+']


fig, axs = plt.subplots(1, 3, figsize=(14, 3), sharex=True, sharey=True, tight_layout=True)

sns.barplot(
    data=plot_df[plot_df.age > 45], x='age group', y='% CAL >= 3', order=age_order, errorbar=None, hue='gender', estimator='mean', ax=axs[0]
)
axs[0].set_title('% Teeth with CAL >= 3')
axs[0].legend().set_visible(False)

sns.barplot(
    data=plot_df[plot_df.age > 45], x='age group', y='% CAL >= 4', order=age_order, errorbar=None, hue='gender', estimator='mean', ax=axs[1]
)
axs[1].set_title('% Teeth with CAL >= 4')

sns.barplot(
    data=plot_df[plot_df.age > 45], x='age group', y='% CAL >= 5', order=age_order, errorbar=None, hue='gender', estimator='mean', ax=axs[2]
)
axs[2].set_title('% Teeth with CAL >= 5')
axs[2].legend().set_visible(False)

axs[0].set_xlabel('age group')
axs[1].set_xlabel('age group')
axs[2].set_xlabel('age group')

axs[0].set_ylabel('percent')

plt.show()

In [None]:
glue = sns.load_dataset("glue")

In [None]:
glue.head()

In [None]:
glue.pivot(index="Model", columns="Task", values="Score")

In [None]:
plot_df.melt(id_vars=['pct CAL >= 3','pct CAL >= 4','pct CAL >= 5'], value_vars=['% CAL >= 3', '% CAL >= 4', '% CAL >= 5'])

In [None]:
# plot_df.pivot_table(index=['pct CAL >= 3','pct CAL >= 4','pct CAL >= 5'], values=['% CAL >= 3', '% CAL >= 4', '% CAL >= 5'])

In [None]:
# temp_df = [plot_df[['pct CAL >= 3', '% CAL >= 3']].head(), plot_df[['pct CAL >= 4', '% CAL >= 4']].head()]
temp_df = [
    plot_df[['pct CAL >= 3', 'falling past year']].head().rename(columns={'pct CAL >= 3':'pct CAL'}),
    plot_df[['pct CAL >= 4', 'falling past year']].head().rename(columns={'pct CAL >= 4':'pct CAL'})
]

In [None]:
pd.concat(temp_df, axis=0, ignore_index=False)

In [None]:
groups = (
    plot_df[['age group', 'pct CAL >= 3', 'falling past year']]
    .groupby(['age group', 'pct CAL >= 3'], as_index=False)
    ['falling past year']
    .sum()
)
temp_df = pd.DataFrame(groups)
temp_df

In [None]:
temp_df2 = plot_df[['age group', 'pct CAL >= 3', 'falling past year']].head(100)
temp_df2[(temp_df2['age group'] == '66-75') & (temp_df2['falling past year'] > 0)]

In [None]:
temp_df.pivot(index="age group", columns="pct CAL >= 3", values="falling past year")

In [None]:
# pct_order = ['0-10','11-20','21-30','31-40','41-50','51-60','61-70','71-80','81-90','90+']
pct_order = ['0-10','11-20','21-30','31-40','41-50']
# pct_order = ['41-50','31-40','21-30','11-20','0-10']
age_order = ['36-45','46-55','56-65', '66-75', '76+']

pct_cal = 'pct CAL >= 5'
groups = (
    plot_df[['age group', pct_cal, 'falling past year']][plot_df['age group'].isin(age_order) & plot_df[pct_cal].isin(pct_order)]
    .groupby(['age group', pct_cal], as_index=False)
    ['falling past year']
    .sum()
)
temp_df = pd.DataFrame(groups)

pivot_df = temp_df.pivot(index=pct_cal, columns="age group", values="falling past year").fillna(0)
sns.heatmap(
    pivot_df,
    cmap="Spectral",
    annot=True,
    linewidth=.5
)

In [None]:
df.RIAGENDR.value_counts()