In [48]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import math
import missingno as msno
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
#import dataset
CI_candidacy_df = pd.read_excel(r"/Users/williambray/Downloads/WUSM_CI_candidacy_dataset.xlsx") 
CI_candidacy_df_final = prepare_dataset(CI_candidacy_df)

In [51]:
#Export to XLM
CI_candidacy_df_final.to_excel('CI_Candidacy_List_merge(10).xlsx', index=True)


In [10]:
##Table of Contents
def prepare_dataset(df):
    df=create_CI_candidate_col(df)
    df=create_CNC_candidacy_col(df)
    df=create_Medicare_candidacy_col(df)
    df=create_cols_HAcur(df)
    df=fill_na_prior_surg(df)
    df=select_cols(df)
    df=Fixerrors_CI_Dataset(df)
    df=CI_changes(df)
    df=year_replace(df)
    df=col_replace(df)
    df=number_cols(df)
    df=year_columns(df)
    df=impute_length_SPHL(df)
    df=year_columns(df)
    df=exclude_SSD(df)
    df=drop_age_SPHL(df)
    df=change_cols(df)
    df=standardize_col_names(df)
    df=HAyrs(df)
    df=create_cols_HAcur(df)

    #df=remove_empty_col(df)

    return df

In [30]:
#select columns
def select_cols(df):
#Age SPHL dropped elsewhere
    cols=[ 
    'MRN',	
    #'Patient Data::DOB',
    'Patient Data::Left Age of Onset',
    'Patient Data::Left Age Severe to Profound HL',
    #'Patient Data::Left Ear Comments',
    'Patient Data::Left Etiology',
    #'Patient Data::Left HA Comments',
    'Patient Data::Left Hearing Aid Use',
    'Patient Data::Left Length of SPHL',
    'Patient Data::Left Prior Ear Surgery',
    'Patient Data::Left Progression HL',
    'Patient Data::Left Tinnitus',
    'Patient Data::Left Years since No HA Use',
    'Patient Data::Right Age of Onset',
    'Patient Data::Right Age Severe to Profound HL',
    #'Patient Data::Right Ear Comments',
    'Patient Data::Right Etiology',
    #'Patient Data::Right HA Comments',
    'Patient Data::Right Hearing Aid Use',
    'Patient Data::Right Length of SPHL',
    'Patient Data::Right Prior Ear Surgery',
    'Patient Data::Right Progression HL',
    'Patient Data::Right Tinnitus',
    'Patient Data::Right Years since No HA use',
    'Age at CI Evaluation',
    'L 1000 Hz',
    'L 125 Hz',
    #'L 1500 Hz',
    'L 2000 Hz',
    'L 250 Hz',
    'L 3000 Hz',
    'L 4000 Hz',
    'L 500 Hz',
    'L 6000 Hz',
    #'L 750 Hz',
    'L 8000 Hz',
    'L PTA (500, 1K, 2K, 4K)',
    'L PTA (500, 1K, 2K)',
    'L WRS%',
    'Left Low Freq PTA (250, 500)',
    'Left Low Frequency PTA (125, 250, 500)',
    'R 1000 Hz',
    'R 125Hz',
    #'R 1500 Hz',
    'R 2000 Hz',
    'R 250 Hz',
    'R 3000 Hz',
    'R 4000 Hz',
    'R 500 Hz',
    'R 6000 Hz',
    #'R 750 Hz',
    'R 8000 Hz',
    'R PTA (500, 1K, 2K, 4K)',
    'R PTA (500, 1K, 2K)',
    'R WRS%',
    # 'Referral From:',
    'Right Low Freq PTA (250, 500)',
    'Right Low Frequency PTA (125, 250, 500)',
    'CNC Left',
    'CNC Right',
    'AzBio Left Ear',
    'AzBio Right Ear',
    'AzBio Binaural',
    'AzBio + 10dB SNR - Binaural',
    'AzBio + 10dB SNR - Left Ear',
    'AzBio + 10dB SNR - Right Ear',
    'AzBio +5dB SNR - Binaural',
    'AzBio +5dB SNR - Left',
    'AzBio +5dB SNR - Right',
    #'MMSE Score',
    'MOCA Score',
    'SSD Candidate?',
    # 'OTHER CI Candidacy'
    'CI candidate',
    'CNC candidate',
    'Medicare candidate'
    ]
    df=df[cols]
#add cols from Old CI Registry
    blank_col=[
         'NoiseExp',
         'NoiseExpYrs',
         'HearingAidUse',
         'Prelingual',
         'Vertigo',
         'Imbalance',
         'FamilyHistoryHL',
         'CNC_bi',
         'CanUseTele_L',
         'CanUseTele_R',
         'HAcur_L',
         'HAcur_R',
         'HAyrs_L',
         'HAyrs_R'
    ]
    df=df.reindex(columns=cols+blank_col)
    return df

In [50]:
##create columns
candidacy_cols=[
    'Age at CI Evaluation'
    'CNC Left',
    'CNC Right',
    'AzBio Binaural'
    'AzBio + 10dB SNR - Binaural']
#CI candidate=AZ Bio quiet or +10<60% binaural
def create_CI_candidate_col(df):
    #df['CI candidate']= df['AzBio + 10dB SNR - Binaural']<=60| (df['AzBio Binaural'] <= 60)
    df.loc[(df['AzBio + 10dB SNR - Binaural']<=60)|(df['AzBio Binaural']<= 60),'CI candidate']=1
    df.loc[(df['AzBio + 10dB SNR - Binaural']>60)|(df['AzBio Binaural'] > 60),'CI candidate']=0
    df.drop(df['CI candidate']==0)
    return df
#CNC candidacy- less than 40% (worst of either ear)
def create_CNC_candidacy_col(df):
    #df['CNC candidate']=df['CNC Left']<=40|(df['CNC Right']<=40)
    df.loc[(df['CNC Left']<=40)&(df['CNC Right']<= 40),'CNC candidate']=1
    df.loc[(df['CNC Left']>40)&(df['CNC Right']>40),'CNC candidate']=0
    df.drop(df['CNC candidate']==0)
    return df
# medicare candidate= over 65 less than 40% (az quiet or +10) binaural
def create_Medicare_candidacy_col(df):
    df['Medicare candidate'] = (((df['AzBio + 10dB SNR - Binaural'] <= 40) | (df['AzBio Binaural'] <= 40)) & (df['Age at CI Evaluation'] >= 65))
    return df

In [32]:
#remove SSD candidates
def exclude_SSD(df):
    df['SSD Candidate?'].fillna('No', inplace=True)
    df= df.loc[df['SSD Candidate?'] != 'Yes']
    return df

In [33]:
###general changes
#remove NA values
def Fixerrors_CI_Dataset(df):
    nadict = {'na':np.nan,'NT':np.nan,'not tested':np.nan,'DNT':np.nan,'NA':np.nan,'dnt':np.nan, ' NA':np.nan, 'nan':np.nan, 'nA':np.nan, 'NA ':np.nan,'no info':np.nan, 'NO':'No', 'YES':'Yes','MA':np.nan,'nan':np.nan,'NA\n':np.nan}
    df = df.replace(nadict)
    return df

#CI specific changes
def CI_changes(df):
    CI_specific_changes = {'NotKnown':99,'Unknown':99,'unsure':99,'unknown, see TELEPHONE use note':99,'unknown; at least several years':99,'125':120,'CNT':0,'cnt':0,'NR':120,'NR125':120,'N':np.nan,'nr':120,'unknown':99,'unknown ':99,'unsure':99}
    df= df.replace(CI_specific_changes)
    return df
    
def year_replace(df):
    years_or_monthsdict={'4 yrs':4,'10yrs':10,'3 years':3,'At Birth':0,'at least 20':20,'childhood': 10,'8 months':0.7,
                         '70s':70,'35yrs':35,'35 years':35,'60s':60,'58 years':58,'identified at 58 ':58,'possibly congenital, less than 3':0,
                         '2 months':0.16,'10-12yrs':11,'1 month':1,'3 months':0.25,'6 months':0.5,'4 months':0.3,'< 1 yr':1,'1yr':1,
                         '1-2 years':1.5,'45 years':45,'early 20s':20,'3-5 yrs':4,'Birth/congenital':0,'Birth/Congenital':0,'Birth':0,
                         'likely congenital':0,'At least 60':60,'childhood; diagnosed ~12 yrs old':12,'unknown, childhood':10,
                         'birth':0,'35 years':35,'10 years':10,'5 or 6 yrs of age':5.5,'At Birth':0,'birth, possibly; Dx at age 4':0,
                         'early 20s':20,'Likely since birth':0,'likely congenital, confirmed at 3yo':0,'childhood; diagnosed ~12 yrs old':12,
                         'unsure, possibly late 40s':47,'20s':25,'18 months':1.5,'1.5 years':1.5,'9 months':0.75, '12-15 years':13.5, 
                         'unknown, maybe 10':10,'identified at 58':58, '10 months':0.8,'72 years':72,'4 years':4,'6 months':0.5, '4 yrs':4,
                         'HL stable for about 1 year:':1,'30 years': 30,'5 yrs':5,'5 years':5,'possibly 46 years':46,'2 years':2,'384':32,
                         '2 years':2,'never worn':np.nan,'unknown, maybe 10':10,'unknown, childhood':10,'1 year':1,'2-3':2.5,'Possibly in childhood':10}
    df=df.replace(years_or_monthsdict)
    return df
#childhood unknowns set to 10

In [34]:
#column-specific_replacements
###make unknown its own category, repl with mean...?
#change year values in excel if 20-- etc...
#note:years in 'Age at CI Evaluation'indicate empty rows and for now can be set as np.nan
#high numbers in Years since no HA use assumed to be months, says (months) on reports sometimes, so converted to yrs
def col_replace(df):
    replace={
    'Patient Data::Left Age of Onset': {'Congenital':0,'Not severe to profound':0,'Slight hearing loss only':np.nan,'childhood':10,'Birth/Congenital':0},
    'Patient Data::Left Age Severe to Profound HL':{'Not severe to profound':0,'Slight hearing loss only':np.nan,'identified at 58':58,
                                                    '7075':np.nan,'67, patient has moderate to severe HL':67},
    'Patient Data::Left Years since No HA Use': {'Never worn':0,'still wears':0,'never worn':np.nan,'2004':16, '120':10, '240':20, 
                                                 '< 12':12,'10-35yrs':25,'300':25,'180':15},
    'Patient Data::Left Length of SPHL':{'HL stable for about 1 year':1,'<1':0},
    'Patient Data::Right Age of Onset':{'Congenital':0,'possibly congenital, less than 3':0, 'possibly 46 years':46,'40s':40},
    'Patient Data::Right Age Severe to Profound HL':{'2005':54,'confirmed at age 10, likely at onset (2-3yo)':2.5, 
                                                     '67, patient has moderate to severe HL':67}, 
    'Patient Data::Right Length of SPHL':{'possibly 46 years':46},
    'Patient Data::Right Years since No HA use':{'never':np.nan,'360':30,'144':12, '984':82, '120':10, '126':10.5, 
                                                '240':20, '288':24,'13 since used CI':np.nan, '300':25,'180':15,'never worn':np.nan},
    'Age at CI Evaluation':{'-1757':np.nan, '2020':np.nan, '2021':np.nan, '-11':np.nan,'-17':np.nan,'-31':np.nan, '-8':np.nan, '2022':np.nan},
    'L 250 Hz':{'65 VT':120,'45]0':45},
    'L 500 Hz':{'90 VT':120},
    'R 1000 Hz':{'6570':65},
    'R 125Hz':{'12585':120},
    'R 250 Hz':{'9090':90},
    'R 6000 Hz':{'NR':120,'nr':120,'na':np.nan,'NT':np.nan},
    'R 8000 Hz':{'110 NR':120,'8-':80},
    'R PTA (500, 1K, 2K, 4K)':{'1693.75':np.nan},
    'R WRS%':{'4%':4,'1/10':10},
    'L WRS%':{'3/10':30,'0%':0},
    'MOCA Score':{2830:28,2430:24,1630:16,2630:26,2030:20,2530:25,2130:21}
}
    for cols in replace:
        for repl in replace[cols]:
            df.loc[df[cols]==repl, cols] = replace[cols][repl]
    return df

def number_cols(df):
    num_col=[
    'Patient Data::Left Age of Onset',
    'Patient Data::Left Age Severe to Profound HL',
    'Patient Data::Left Length of SPHL',
    'Patient Data::Left Years since No HA Use',
    'Patient Data::Right Years since No HA use',
    'Patient Data::Right Age of Onset',
    'Patient Data::Right Age Severe to Profound HL',
    'Patient Data::Right Length of SPHL',
    'Age at CI Evaluation',
    'L 1000 Hz',
    'L 125 Hz',
    #'L 1500 Hz',
    'L 2000 Hz',
    'L 250 Hz',
    'L 3000 Hz',
    'L 4000 Hz',
    'L 500 Hz',
    'L 6000 Hz',
    #'L 750 Hz',
    'L 8000 Hz',
    'L PTA (500, 1K, 2K, 4K)',
    'L PTA (500, 1K, 2K)',
    'L WRS%',
    'Left Low Freq PTA (250, 500)',
    'Left Low Frequency PTA (125, 250, 500)',
    'R 1000 Hz',
    'R 125Hz',
    #'R 1500 Hz',
    'R 2000 Hz',
    'R 250 Hz',
    'R 3000 Hz',
    'R 4000 Hz',
    'R 500 Hz',
    'R 6000 Hz',
    #'R 750 Hz',
    'R 8000 Hz',
    'R PTA (500, 1K, 2K, 4K)',
    'R PTA (500, 1K, 2K)',
    'R WRS%',
    'Right Low Freq PTA (250, 500)',
    'Right Low Frequency PTA (125, 250, 500)',
    'CNC Left',	
    'CNC Right',
    'AzBio Left Ear',
    'AzBio Right Ear',
    'AzBio Binaural',
    'AzBio + 10dB SNR - Binaural',
    'AzBio + 10dB SNR - Left Ear',
    'AzBio + 10dB SNR - Right Ear',	
    'AzBio +5dB SNR - Binaural',
    'AzBio +5dB SNR - Left',
    'AzBio +5dB SNR - Right',
    #'MMSE Score',
    'MOCA Score'
    ]
    for cols in num_col:
        df[num_col]=df[num_col].astype(float)
    return df

##removing outliers in years
#acceptable year range    
#all columns with years 
def year_columns(df):
    yr_col={
    'Patient Data::Left Age of Onset',
    'Patient Data::Left Age Severe to Profound HL',
    #'Patient Data::Left Years since No HA Use',
    'Patient Data::Left Length of SPHL',
    'Patient Data::Right Age of Onset',
    'Patient Data::Right Age Severe to Profound HL',
    #'Patient Data::Right Years since No HA use',
    'Patient Data::Right Length of SPHL',
    'Age at CI Evaluation'
    } 
    for index, row in df.iterrows():
        for col in yr_col:
            value = row[col]
            if not (0 <= value <= 100):
                df.at[index, col] = np.nan
    
    return df


CI_candidacy_df_final = prepare_dataset(CI_candidacy_df)






NameError: name 'impute_length_SPHL' is not defined

In [35]:
#convert to numbers
def n_obj_col:
obj_col_replace=[
    #ototoxic medication/radiation listed same as 'radiation'
    'Patient Data::Left Etiology':{'Unknown':,'Acoustic Neuroma':,'Presbycusis':,'Meningitis':, "Meniere's Disease":, 'Noise Exposure':5, 'Autoimmune':6,'Congenital Progressive':7, 'Congenital':8, 'Genetic':9,'Ototoxic Medication / Radiation':10, 'Sudden Hearing Loss':11,'Syndrome':12, 'Otosclerosis':13, 'Acoustic Trauma':14, 'Radiation':10,'Congenital Prematurity':15,'Other':16},
    'Patient Data::Left Hearing Aid Use':{},
    'Patient Data::Left Prior Ear Surgery':{},
    'Patient Data::Left Progression HL':{},
    'Patient Data::Left Tinnitus':{},
    'Patient Data::Right Etiology':{},
    'Patient Data::Right Hearing Aid Use':{},
    'Patient Data::Right Prior Ear Surgery':{},
    'Patient Data::Right Progression HL':{},
    'Patient Data::Right Tinnitus':{},
    'SSD Candidate?':{},
]

  

SyntaxError: expected '(' (4048367772.py, line 2)

In [36]:
#remove % sign
def remove_percent(df):
    df=df.applymap(lambda x: x.str.strip('%'))
    return df

In [37]:
#use:
# ranges=[0,20,50,np.inf]
#group_names:[0-20,20-5-,50+]
#demographics['income_group']=pd.cut(demographics['household_income'],bins=ranges,labels=group_names)
#gives new column with row values corresponding to the categories the household income corresponds to 
#functions: pandas.cut(), pandas.qcut(), .replace()


In [38]:
def None_to_one(df):
    df.loc[df['Patient Data::Left Prior Ear Surgery']=='none','Patient Data::Left Prior Ear Surgery']= 0
    df.loc[df['Patient Data::Right Prior Ear Surgery']=='none','Patient Data::Right Prior Ear Surgery']= 0
    return df 
msno.matrix(CI_candidacy_df_final, labels=True)
plt.show()


NameError: name 'CI_candidacy_df_final' is not defined

In [39]:
#imput length SPHL
def impute_length_SPHL(df):
    condition_left = (df['Age at CI Evaluation'].notna()) & (df['Patient Data::Left Age Severe to Profound HL'].notna())
    mask_left = df['Patient Data::Left Length of SPHL'].isna()
    df.loc[condition_left & mask_left, 'Patient Data::Left Length of SPHL'] = df['Age at CI Evaluation'] - df['Patient Data::Left Age Severe to Profound HL']
    condition_right = (df['Age at CI Evaluation'].notna()) & (df['Patient Data::Right Age Severe to Profound HL'].notna())
    mask_right = df['Patient Data::Right Length of SPHL'].isna()
    df.loc[condition_right & mask_right, 'Patient Data::Right Length of SPHL'] = df['Age at CI Evaluation'] - df['Patient Data::Right Age Severe to Profound HL']
    return df
    
    

In [40]:
#fill na of prior surgery
def fill_na_prior_surg(df):
    df['Patient Data::Right Prior Ear Surgery'].fillna('None',inplace=True)
    df['Patient Data::Left Prior Ear Surgery'].fillna('None',inplace=True)
    return df 

In [41]:
def remove_empty_col(df):
    df.dropna(how='any', inplace=True)
    return df

In [42]:
#create col to match other data, indicate if currently using HA (years since no HA >0 means HA cur=0)
def create_cols_HAcur(df):
    df['HAcur_L'] = np.where(df['Patient Data::Left Hearing Aid Use'].notna()&
        ((df['Patient Data::Left Years since No HA Use'] == 0) | 
        (pd.isna(df['Patient Data::Left Years since No HA Use']) & (df['Patient Data::Left Hearing Aid Use'] == 'Yes'))),1,0)
    df['HAcur_R'] = np.where(df['Patient Data::Right Hearing Aid Use'].notna()&
        ((df['Patient Data::Right Years since No HA use'] == 0) | 
        (pd.isna(df['Patient Data::Right Years since No HA use']) & (df['Patient Data::Right Hearing Aid Use'] == 'Yes'))),1,0)
    return df

#     df['HAuse_L']=df.loc[df['Patient Data::Left Years since No HA Use']==0,'Patient Data::Left Years since No HA Use']=1
#     df.loc['Patient Data::Left Hearing Aid Use']=='Yes',]=df['HAuse_L'].fillna(1) 
#         df['HAuse_L'].fillna(1)
#     df['HAuse_L']=df.loc[df['Patient Data::Left Years since No HA Use']>0,'Patient Data::Left Years since No HA Use']=0
    
#     df['HAuse_R']=df.loc[df['Patient Data::Right Years since No HA Use']==0,'Patient Data::Right Years since No HA Use']=1
#     if df['Patient Data::Right Hearing Aid Use']=='Yes': 
#         df['HAuse_R'].fillna(1)
#     df['HAuse_R']=df.loc[df['Patient Data::Right Years since No HA Use']>0,'Patient Data::Right Years since No HA Use']=0
#     return df
            
    

In [43]:
def drop_age_SPHL(df):
    df=df.drop('Patient Data::Left Age Severe to Profound HL',axis=1)
    df=df.drop('Patient Data::Right Age Severe to Profound HL',axis=1)
    return df

In [44]:
def change_cols(df):
    #rid illogical age values (getting negatives from original df)
    df.loc[df['Age at CI Evaluation']<df['Patient Data::Left Age of Onset'],'Age at CI Evaluation']=99
    df.loc[df['Age at CI Evaluation']<df['Patient Data::Right Age of Onset'],'Age at CI Evaluation']=99
    #subtract Age at eval with onset to get HLdur
    condition_left=(df['Age at CI Evaluation'].notna())&(df['Age at CI Evaluation']!=99) & (df['Patient Data::Left Age of Onset'].notna())&(df['Patient Data::Left Age of Onset']!=99)
    df.loc[condition_left,'HLdur_L']=df['Age at CI Evaluation']-df['Patient Data::Left Age of Onset']
    condition_right=(df['Age at CI Evaluation'].notna())&(df['Age at CI Evaluation']!=99) & (df['Patient Data::Right Age of Onset'].notna())&(df['Patient Data::Right Age of Onset']!=99)
    df.loc[condition_right,'HLdur_R']=df['Age at CI Evaluation']-df['Patient Data::Right Age of Onset']
    return df
    

In [45]:
def standardize_col_names(df):
    st_coldict={
    #'Patient Data::Left Age of Onset':'HLdur_L',##########use to calcHLdur with age of onset and age
    #'Patient Data::Left Age Severe to Profound HL', delete
    'Patient Data::Left Etiology':'Etiology_L',
    #'Patient Data::Left Hearing Aid Use' #not same as HAcur(currently using HA, Y/N), use HAyrs and HA Use Time to get a yes or no on HA Use
    'Patient Data::Left Length of SPHL':'HLdurSP_L',
    'Patient Data::Left Prior Ear Surgery':'PriorES_L',
    'Patient Data::Left Progression HL':'HLprg_L',
    'Patient Data::Left Tinnitus':'Tinnitus_L',
    #'Patient Data::Right Age of Onset':'HLdur_R',##########use to calcHLdur with age of onset and age
    #'Patient Data::Right Age Severe to Profound HL',delete
    'Patient Data::Right Etiology':'Etiology_R',
    #'Patient Data::Right Hearing Aid Use':'HLdur_R',#not same as HA cur
    'Patient Data::Right Length of SPHL':'HLdurSP_R',
    'Patient Data::Right Prior Ear Surgery':'PriorES_R',
    'Patient Data::Right Progression HL':'HLprg_R',
    'Patient Data::Right Tinnitus':'Tinnitus_R',
    'Age at CI Evaluation':'Age',
     'L 1000 Hz':'hz1000_L',
     'L 125 Hz':'hz125_L',
     'L 2000 Hz':'hz2000_L',
     'L 250 Hz':'hz250_L',
     'L 3000 Hz':'hz3000_L',
     'L 4000 Hz':'hz4000_L',
     'L 500 Hz':'hz500_L',
     'L 6000 Hz':'hz6000_L',
     'L 8000 Hz':'hz8000_L',
 #    'L PTA (500, 1K, 2K, 4K)',
 #    'L PTA (500, 1K, 2K)',
     'L WRS%':'WRS_L',
 #   'Left Low Freq PTA (250, 500)',
 #   'Left Low Frequency PTA (125, 250, 500)',
     'R 1000 Hz':'hz1000_R',
     'R 125Hz':'hz125_R',
     'R 2000 Hz':'hz2000_R',
     'R 250 Hz':'hz250_R',
     'R 3000 Hz':'hz3000_R',
     'R 4000 Hz':'hz4000_R',
     'R 500 Hz':'hz500_R',
     'R 6000 Hz':'hz6000_R',
     'R 8000 Hz':'hz8000_R',
 #    'R PTA (500, 1K, 2K, 4K)',
  #   'R PTA (500, 1K, 2K)',
     'R WRS%':'WRS_R',
   #  'Right Low Freq PTA (250, 500)',
   #  'Right Low Frequency PTA (125, 250, 500)',
     'CNC Left':'CNC_L',
     'CNC Right':'CNC_R',
     'AzBio Left Ear':'AzBioQuiet_L',
     'AzBio Right Ear':'AzBioQuiet_R',
     'AzBio Binaural':'AzBioQuiet_bi',
     'AzBio + 10dB SNR - Binaural':'AzBio10SNR_bi',
     'AzBio + 10dB SNR - Left Ear':'AzBio10SNR_L',
     'AzBio + 10dB SNR - Right Ear':'AzBio10SNR_R',
     'AzBio +5dB SNR - Binaural':'AzBio5SNR_bi',
     'AzBio +5dB SNR - Left':'AzBio5SNR_L',
     'AzBio +5dB SNR - Right':'AzBio5SNR_R',   
     'SSD Candidate?':'SSDCandidate',
     'MOCA Score':'MOCA',
    }
    df = df.rename(columns=st_coldict)
    return df
        

In [46]:
def HAyrs(df):
    df.loc[df['Patient Data::Left Hearing Aid Use']=='No','HAyrs_L']=0
    df.loc[df['Patient Data::Right Hearing Aid Use']=='No','HAyrs_R']=0
    return df


In [47]:
msno.matrix(CI_candidacy_df_final,labels=True)
plt.show()

NameError: name 'CI_candidacy_df_final' is not defined

In [22]:
CI_candidacy_df_final.columns.values.tolist()

NameError: name 'CI_candidacy_df_final' is not defined

In [23]:
CI_candidacy_df_final['Etiology_L'].unique()

NameError: name 'CI_candidacy_df_final' is not defined