In [1]:
import pandas as pd
import numpy as np
import csv
import matplotlib.pyplot as plt

pd.options.mode.chained_assignment = None

riskdf = pd.read_sas('Resources\LLCP2012.XPT')

fip_state = pd.read_csv('fips_to_state.csv')

riskdf.head()

Unnamed: 0,_STATE,_GEOSTR,_DENSTR2,PRECALL,REPNUM,REPDEPTH,FMONTH,IDATE,IMONTH,IDAY,...,_PNEUMO2,_RFSEAT2,_RFSEAT3,_RFMAM2Y,_MAM502Y,_RFPAP32,_RFPSA21,_RFBLDS2,_RFSIGM2,_AIDTST3
0,1.0,7.0,1.0,1.0,20118.0,3.0,2.0,b'02162012',b'02',b'16',...,,1.0,1.0,,,,1.0,1.0,2.0,2.0
1,1.0,8.0,1.0,1.0,10119.0,17.0,1.0,b'01052012',b'01',b'05',...,,1.0,1.0,,,,1.0,2.0,1.0,1.0
2,1.0,8.0,2.0,1.0,10126.0,5.0,1.0,b'01032012',b'01',b'03',...,1.0,1.0,1.0,1.0,1.0,,,9.0,1.0,2.0
3,1.0,8.0,1.0,1.0,10128.0,9.0,1.0,b'01192012',b'01',b'19',...,2.0,1.0,1.0,1.0,1.0,,,2.0,2.0,2.0
4,1.0,8.0,1.0,1.0,10130.0,26.0,1.0,b'01062012',b'01',b'06',...,,1.0,1.0,1.0,1.0,,,2.0,1.0,2.0


In [2]:
# Re-naming the _STATE to fips for merging the FIPS DF and New DF# Re-nam 
riskdf = riskdf.rename(columns={"_STATE":"fips"})
riskdf = riskdf.merge(fip_state,on="fips")
# Dropping the fips column as it is not required further
riskdf.drop(["fips"],axis=1,inplace=True)

In [3]:
#create a new dataframe with the columns we want
#note that 2012 data has a different column for _RACE that's used in 2013-2016
newdf = riskdf[['state_abbr', 'IDATE' , '_BMI5CAT', '_RFBMI5', 'RACE2']]

newdf.head()

Unnamed: 0,state_abbr,IDATE,_BMI5CAT,_RFBMI5,RACE2
0,AL,b'02162012',3.0,2.0,1.0
1,AL,b'01052012',3.0,2.0,1.0
2,AL,b'01032012',4.0,2.0,2.0
3,AL,b'01192012',2.0,1.0,1.0
4,AL,b'01062012',4.0,2.0,1.0


In [4]:
#fill in columns with data based on the 2016 codebook
#reference: https://www.cdc.gov/brfss/annual_data/2016/pdf/codebook16_llcp.pdf
newdf["IDATE"] = newdf["IDATE"].str.decode("utf-8")
newdf["_BMI5CAT"] = newdf["_BMI5CAT"].replace({1:"Underweight", 2.0:"Normal Weight", 3.0:"Overweight", 4.0:"Obese", None:"Don't know/Refused/Missing"})
newdf["_RFBMI5"]= newdf["_RFBMI5"].replace({1.0:"No", 2.0:"Yes", 9.0:"Don’t know/Refused/Missing"})
newdf["RACE2"] = newdf["RACE2"].replace({1.0:"White, Non-Hispanic", 2.0:"Black, Non-Hispanic", 3.0:"Asian", 4.0:"Native Hawaiian/Pacific Islander", 5.0: "American Indian/Alaskan Native", 6.0: "Other Race, Non-Hispanic", 7.0: "Multiracial, Non-Hispanic", 8.0: "Hispanic", 9.0: "Don't Know/Not Sure/Refused"})

newdf = newdf.rename(columns={"state_abbr": "State", 
                              "IDATE": "Date", 
                              "_BMI5CAT": "BMI Category", 
                              "_RFBMI5": "BMI Over 25", 
                              "RACE2": "Race/Ethnicity (General)",})

newdf.head()

Unnamed: 0,State,Date,BMI Category,BMI Over 25,Race/Ethnicity (General)
0,AL,2162012,Overweight,Yes,"White, Non-Hispanic"
1,AL,1052012,Overweight,Yes,"White, Non-Hispanic"
2,AL,1032012,Obese,Yes,"Black, Non-Hispanic"
3,AL,1192012,Normal Weight,No,"White, Non-Hispanic"
4,AL,1062012,Obese,Yes,"White, Non-Hispanic"


In [5]:
#save to csv
newdf.to_csv("ObesityEthnicity2012.csv",index=False)