In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
#import geopandas as gpd

In [None]:
tn_df = pd.read_csv('../data/tn_county_data.csv', header = 1)
tn_pop = pd.read_csv('../data/tn_pop.csv', header = 1)
tn_2011 = pd.read_csv('../data/tn_obesity_2011.csv', header = 1)
#tn_counties = gpd.read_file('../data/county/tncounty.shp')

In [None]:
tn_df.reset_index(drop = True)

In [None]:
# subset for obesity by tn county

tn_county_obesity = tn_df[['State', 'County', '% Adults with Obesity']]
tn_county_obesity.tail(10)

In [None]:
tn_pop.head()

In [None]:
#subset for tn_pop

tn_pop = tn_pop[['State', 'County', 'Population','Median Household Income']]

In [None]:
#brought this df in to make a comparison between 2022 and 2011 obesity rates

tn_2011 = tn_2011[['State', 'County', '% Obese']]

In [None]:
#renamed the column so there wont be any confusion later

tn_2011 = tn_2011.rename(columns = {'% Obese': 'obesity_2011'})

In [None]:
#dropped the NaN value

tn_2011 = tn_2011.iloc[1:96]

In [None]:
tn_2011.reset_index(drop = True)

In [None]:
#merged tn_pop with most recent obesity rates df

tn_county = pd.merge(tn_county_obesity, tn_pop, left_on = 'County', right_on = 'County', how = 'inner')

In [None]:
#dropped duplicate columns

tn_county = tn_county.drop(columns = 'State_y')

In [None]:
#population count of obesity

tn_county['pop_with_obesity'] = round((tn_county['Population'] * tn_county['% Adults with Obesity']) / 100,0)

In [None]:
#dropped NaN value

tn_county = tn_county.iloc[1:96]

In [None]:
#renamed columns

tn_county = tn_county.rename(columns = {'% Adults with Obesity': 'obesity_perc_2022','State_x':'state','Median Household Income':'median_income'})

In [None]:

fig = px.bar(tn_county, x = 'pop_with_obesity', y = 'County', orientation = 'h')
fig.show()

In [None]:


fig = px.scatter(tn_county, x = 'obesity_perc_2022', y= 'Population', title = 'TN Pop and Obesity Percentage')
fig.show()

In [None]:

fg = px.scatter(tn_county, x = 'obesity_perc_2022', y= 'median_income',
                title = 'Median Household Income and Obesity Percentage')
fg.show()

In [None]:
tn_county.head()

In [None]:
# #read in dataframes from food_access notebook

tn_county_ff = pd.read_csv('../data/tn_ff.csv')
tn_grocery = pd.read_csv('../data/tn_grocery.csv')
tn_farms = pd.read_csv('../data/tn_farms.csv')

In [None]:
#fast food per capita for all of tennessee counties

ff_percap = tn_county_ff.loc[tn_county_ff['ff_year'] == 'FFRPTH16']
ff_percap.head()

In [None]:
#merged fast food data frame and tn_county dataframe

tn_county = pd.merge(tn_county, ff_percap, left_on = 'County', right_on = 'County', how = 'inner')

In [None]:
#dropped duplicate columns

tn_county = tn_county[['state', 'County', 'obesity_perc_2022', 'Population', 'median_income', 'pop_with_obesity',
          'ff_year', 'count']]

In [None]:
#renamed fast food value column

tn_county = tn_county.rename(columns = {'count':'ff_per1k'})

In [None]:
# duplicated code. revisited this in the food_access notebook

tn_grocery = tn_grocery.groupby(['County'])['Value'].sum().to_frame().reset_index().rename(columns = {'Value':'grocery_store_count'})

In [None]:
#merged grocery stores to tn_county

tn_county = pd.merge(tn_county, tn_grocery, left_on = 'County', right_on = 'County', how = 'inner')
tn_county

In [None]:
#cleaned up columns

tn_county = tn_county.rename(columns = {'Variable_Code':'grocery_stores', 'Value':'groc_count'})

In [None]:
# berry, farmers' markets, orchard, and vegetable farms 

tn_farm_count = tn_farms.groupby('County')['Value'].sum().to_frame().reset_index().rename(columns = {'Value':'farm_count'})

In [None]:
#merged famrs to rest of the tn dataframe

tn_county = pd.merge(tn_county, tn_farm_count, left_on = 'County', right_on = 'County', how = 'inner')

In [None]:
tn_county.head(1)

In [None]:
#merged 2011 obesity rates with tn_county

tn_county = pd.merge(tn_county, tn_2011, left_on = 'County', right_on = 'County', how = 'inner')

In [None]:
#dropped duplicate State

tn_county = tn_county.drop(columns = {'State'})

In [None]:
#changed obesity into a usable format

tn_county.obesity_2011 = tn_county.obesity_2011.astype(int)
tn_county.pop_with_obesity = tn_county.pop_with_obesity.astype(int)

In [None]:
# tn_county['Population']/tn_county['farm_count']

In [None]:
#use for dashboard
tn_county.to_csv('../data/dashboard_csv/tenn_county_food_environment.csv', index =False)

In [None]:
import plotly.graph_objects as go


fig = go.Figure()
fig.add_trace(go.Bar(
    x = tn_county['County'],
    y = tn_county['obesity_2011'],
    name = '2011 Obesity Percentage',
    marker_color = 'rgb(55,83,109)'
))
fig.add_trace(go.Bar(
    x = tn_county['County'],
    y = tn_county['obesity_perc_2022'],
    name = '2022 Obesity Percentage',
    marker_color = 'rgb(26,118,255)'
))

fig.update_layout(barmode = 'group',bargap = 0.25, bargroupgap = 0.1, xaxis_tickangle = 90)
fig.show()

In [None]:
#tn_counties.head()

In [None]:
#tn_counties = pd.merge(left = tn_counties, right = tn_county, left_on = 'NAME', right_on = 'County')

In [None]:
#type(tn_counties)

In [None]:
# fig, ax = plt.subplots(figsize = (16,4))
# tn_counties.plot(column = 'obesity_perc',
#                 cmap = 'OrRd',
#                 edgecolor = 'black',
#                 legend = True,
#                 ax = ax)
# plt.title('Obesity Rate by County, 2021', fontsize = 14)
# ax.axis('off');

###### What are the obesity rates for places with more accessibility to fast food?
                - add indexes for counties (count of fast food per capita/farms/grocery stores)
                - tableau for rest of visualizations
###### What are the trends for urban vs rural food deserts? (Food desert being defined as a supermarket not within 10 miles for rural areas)
###### Does obesity vary by income?
###### How does obesity vary by region in the United States? 

###### In this capstone project, I will be utilizing Python to do an analysis on different factors that could potentially impact obesity rates in Tennessee counties and other regions of the U.S. My goal is to create multiple visualizations, including an interactive map and dashboard to tell the story of the data. With this project, I hope to form an analysis by identifying rural and urban food desert trends by comparing specific regions to one another. I will also be looking at median household incomes to see if there is a correlation with other factors in the analysis. (Is there a higher median household income in cities with lower obesity rates?) Finally, I would like to analyze food access (fast-food restaurants/1,000 people, grocery stores, farmers’ market, etc.) for Tennessee counties. My stretch goal is to see if there is a significant impact on walkable cities on obesity.