In [1]:
import pandas as pd
import numpy as np
import csv
import matplotlib.pyplot as plt

pd.options.mode.chained_assignment = None

fip_state = pd.read_csv('fips_to_state.csv')

brfss_df = pd.read_sas('Resources\LLCP2012.XPT')

brfss_df.head()

Unnamed: 0,_STATE,_GEOSTR,_DENSTR2,PRECALL,REPNUM,REPDEPTH,FMONTH,IDATE,IMONTH,IDAY,...,_PNEUMO2,_RFSEAT2,_RFSEAT3,_RFMAM2Y,_MAM502Y,_RFPAP32,_RFPSA21,_RFBLDS2,_RFSIGM2,_AIDTST3
0,1.0,7.0,1.0,1.0,20118.0,3.0,2.0,b'02162012',b'02',b'16',...,,1.0,1.0,,,,1.0,1.0,2.0,2.0
1,1.0,8.0,1.0,1.0,10119.0,17.0,1.0,b'01052012',b'01',b'05',...,,1.0,1.0,,,,1.0,2.0,1.0,1.0
2,1.0,8.0,2.0,1.0,10126.0,5.0,1.0,b'01032012',b'01',b'03',...,1.0,1.0,1.0,1.0,1.0,,,9.0,1.0,2.0
3,1.0,8.0,1.0,1.0,10128.0,9.0,1.0,b'01192012',b'01',b'19',...,2.0,1.0,1.0,1.0,1.0,,,2.0,2.0,2.0
4,1.0,8.0,1.0,1.0,10130.0,26.0,1.0,b'01062012',b'01',b'06',...,,1.0,1.0,1.0,1.0,,,2.0,1.0,2.0


In [2]:
# Re-naming the _STATE to fips for merging the FIPS DF and New DF# Re-nam 
brfss_df = brfss_df.rename(columns={"_STATE":"fips"})
brfss_df = brfss_df.merge(fip_state,on="fips")
# Dropping the fips column as it is not required further
brfss_df.drop(["fips"],axis=1,inplace=True)

In [3]:
#create a new dataframe with the columns we want
#note that 2012 data has a different column for _RACE that's used in 2013-2016
new_df = brfss_df[['state_abbr', 'IDATE' , '_BMI5CAT', '_RFBMI5', 'RACE2', '_EDUCAG', 'INCOME2', '_AGE_G', 'SEX']]

new_df.head()

Unnamed: 0,state_abbr,IDATE,_BMI5CAT,_RFBMI5,RACE2,_EDUCAG,INCOME2,_AGE_G,SEX
0,AL,b'02162012',3.0,2.0,1.0,3.0,6.0,5.0,1.0
1,AL,b'01052012',3.0,2.0,1.0,4.0,8.0,4.0,1.0
2,AL,b'01032012',4.0,2.0,2.0,4.0,99.0,6.0,2.0
3,AL,b'01192012',2.0,1.0,1.0,1.0,3.0,6.0,2.0
4,AL,b'01062012',4.0,2.0,1.0,2.0,99.0,5.0,2.0


In [4]:
#fill in columns with data based on the 2016 codebook
#reference: https://www.cdc.gov/brfss/annual_data/2016/pdf/codebook16_llcp.pdf
new_df["IDATE"] = new_df["IDATE"].str.decode("utf-8")

new_df["_BMI5CAT"] = new_df["_BMI5CAT"].replace({1:"Underweight", 
                                                 2.0:"Normal Weight", 
                                                 3.0:"Overweight", 
                                                 4.0:"Obese", 
                                                 None:"Don't know/Refused/Missing"})

new_df["_RFBMI5"]= new_df["_RFBMI5"].replace({1.0:"No", 2.0:"Yes", 9.0:"Don’t know/Refused/Missing"})

new_df["RACE2"] = new_df["RACE2"].replace({1.0:"White, Non-Hispanic", 
                                           2.0:"Black, Non-Hispanic", 
                                           3.0:"Asian", 
                                           4.0:"Native Hawaiian/Pacific Islander", 
                                           5.0: "American Indian/Alaskan Native", 
                                           6.0: "Other Race, Non-Hispanic", 
                                           7.0: "Multiracial, Non-Hispanic", 
                                           8.0: "Hispanic", 
                                           9.0: "Don't Know/Not Sure/Refused"})

new_df["_EDUCAG"] = new_df["_EDUCAG"].replace({1.0:"Did not graduate High School", 2.0:"Graduated High School", 
                                                           3.0:"Attended College or Technical School", 
                                                           4.0:"Graduated from College or Technical School", 
                                                           9.0: "Don’t know/Not sure/Missing"})

new_df["INCOME2"] = new_df["INCOME2"].replace({1.0:"Less than $10,000", 2.0:"$10,000 to less than $15,000",
                                                             3.0:"$15,000 to less than $20,000", 4.0:"$20,000 to less than $25,000", 
                                                             5.0:"$25,000 to less than $35,000", 6.0: "$35,000 to less than $50,000",
                                                             7.0:"$50,000 to less than $75,000",8.0:"$75,000 or more",
                                                             8.0:"$75,000 or more",77.0:"Don’t know/Not sure",99:"Refused",
                                                             None:"Not asked or Missing"})

new_df["_AGE_G"] = new_df["_AGE_G"].replace({1.0:"Age 18 to 24", 
                                             2.0:"Age 25 to 34", 
                                             3.0:"Age 35 to 44", 
                                             4.0:"Age 45 to 54", 
                                             5.0: "Age 55 to 64", 
                                             6.0: "Age 65 or older"})

new_df["SEX"] = new_df["SEX"].replace({1.0:"Male", 2.0:"Female", 9.0:"Refused"})

new_df = new_df.rename(columns={"state_abbr": "State", 
                              "IDATE": "Date", 
                              "_BMI5CAT": "BMI Category", 
                              "_RFBMI5": "BMI Over 25", 
                              "RACE2": "Race/Ethnicity",
                              "_EDUCAG": "Education Level",
                              "INCOME2": "Income Range",
                              "_AGE_G": "Age Group",
                              "SEX": "Sex"
                               })

new_df = new_df[["State", "Date", "BMI Category", "BMI Over 25", "Race/Ethnicity", "Education Level", "Income Range", "Age Group", "Sex"]]

new_df.head()

Unnamed: 0,State,Date,BMI Category,BMI Over 25,Race/Ethnicity,Education Level,Income Range,Age Group,Sex
0,AL,2162012,Overweight,Yes,"White, Non-Hispanic",Attended College or Technical School,"$35,000 to less than $50,000",Age 55 to 64,Male
1,AL,1052012,Overweight,Yes,"White, Non-Hispanic",Graduated from College or Technical School,"$75,000 or more",Age 45 to 54,Male
2,AL,1032012,Obese,Yes,"Black, Non-Hispanic",Graduated from College or Technical School,Refused,Age 65 or older,Female
3,AL,1192012,Normal Weight,No,"White, Non-Hispanic",Did not graduate High School,"$15,000 to less than $20,000",Age 65 or older,Female
4,AL,1062012,Obese,Yes,"White, Non-Hispanic",Graduated High School,Refused,Age 55 to 64,Female


In [5]:
#save to csv
new_df.to_csv("FilteredObesity2012.csv",index=False)
print("Cleaning Complete!")

Cleaning Complete!
