In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
import plotly.express as px
import plotly.io as pio

from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [2]:
df = pd.read_pickle(r'./../data/AtlasCleaned.pkl')
df.shape

(3142, 100)

In [3]:
df.drop(labels=['LACCESS_POP10', 'LACCESS_POP15', 'PCH_LACCESS_POP_10_15', 'LACCESS_LOWI10', 'LACCESS_LOWI15',
                    'PCH_LACCESS_LOWI_10_15', 'LACCESS_HHNV10', 'LACCESS_HHNV15', 'PCH_LACCESS_HHNV_10_15',
                    'LACCESS_SNAP15', 'LACCESS_CHILD10', 'LACCESS_CHILD15', 'LACCESS_CHILD_10_15', 
                    'LACCESS_SENIORS10', 'LACCESS_SENIORS15', 'PCH_LACCESS_SENIORS_10_15', 'LACCESS_WHITE15', 
                    'LACCESS_BLACK15', 'LACCESS_HISP15', 'LACCESS_NHASIAN15', 'LACCESS_NHNA15', 
                    'LACCESS_NHPI15', 'LACCESS_MULTIR15'], axis=1, inplace=True)

In [9]:
df10_cols = ['PCT_LACCESS_POP10', 'PCT_LACCESS_LOWI10', 'PCT_LACCESS_HHNV10',
             'PCT_LACCESS_CHILD10','PCT_LACCESS_SENIORS10', 'PCT_FREE_LUNCH10', 'PCT_REDUCED_LUNCH10',
             'PCT_NHWHITE10', 'PCT_NHBLACK10', 'PCT_HISP10', 'PCT_NHASIAN10', 'PCT_NHNA10', 'PCT_NHPI10',
             'PCT_65OLDER10', 'PCT_18YOUNGER10',
             'GROCPTH11', 'SUPERCPTH11', 'CONVSPTH11', 'SPECSPTH11', 
             'SNAPSPTH12', 'FFRPTH11', 'FSRPTH11', 'PC_FFRSALES07','PC_FSRSALES07', 'PCT_SNAP12', 
             'SNAP_PART_RATE11', 'PCT_NSLP12','PCT_SBP12', 'PCT_SFSP12',
             'FDPIR12','FOODINSEC_12_14','VLFOODSEC_12_14','DIRSALES_FARMS07', 'FMRKTPTH13']

df15_cols = ['PCT_LACCESS_POP15', 'PCT_LACCESS_LOWI15', 'PCT_LACCESS_HHNV15', 
             'PCT_LACCESS_SNAP15', 'PCT_LACCESS_CHILD15', 'PCT_LACCESS_SENIORS15', 'PCT_LACCESS_WHITE15',
             'PCT_LACCESS_BLACK15','PCT_LACCESS_HISP15', 'PCT_LACCESS_NHASIAN15', 'PCT_LACCESS_NHNA15', 
             'PCT_LACCESS_NHPI15', 'PCT_LACCESS_MULTIR15', 'PCT_FREE_LUNCH15', 'PCT_REDUCED_LUNCH15', 
             'GROCPTH16', 'SUPERCPTH16', 'CONVSPTH16', 'SPECSPTH16', 'SNAPSPTH17', 'FFRPTH16', 'FSRPTH16',
             'PC_FFRSALES12', 'PC_FSRSALES12', 'PCT_SNAP17','SNAP_PART_RATE16', 'PCT_NSLP17', 'PCT_SBP17', 'PCT_SFSP17',
             'FDPIR15', 'FOODINSEC_15_17','VLFOODSEC_15_17', 'DIRSALES_FARMS12', 'FMRKTPTH18']

df10 = pd.DataFrame(df, columns = df10_cols)
df15 = pd.DataFrame(df, columns = df15_cols)

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

df10 = pd.DataFrame(scaler.fit_transform(df10), columns=df10.columns)
df15 = pd.DataFrame(scaler.fit_transform(df15), columns=df15.columns)

df10.rename(columns = lambda x : str(x)[:-2], inplace=True)
df15.rename(columns = lambda x : str(x)[:-2], inplace =True)


In [13]:
X_train = df10.drop(columns=['PCT_LACCESS_CHILD','PCT_LACCESS_SENIORS','PCT_NHWHITE', 'PCT_NHBLACK',
                       'PCT_HISP', 'PCT_NHASIAN', 'PCT_NHNA', 'PCT_NHPI', 
                       'PCT_65OLDER', 'PCT_18YOUNGER', 'VLFOODSEC_12_', 'FOODINSEC_12_'], axis=1)
y_train = df10['PCT_LACCESS_POP']


X_test = df15.drop(columns=['PCT_LACCESS_CHILD','PCT_LACCESS_SENIORS','PCT_LACCESS_SNAP','PCT_LACCESS_WHITE',
             'PCT_LACCESS_BLACK','PCT_LACCESS_HISP', 'PCT_LACCESS_NHASIAN', 'PCT_LACCESS_NHNA', 
             'PCT_LACCESS_NHPI', 'PCT_LACCESS_MULTIR', 'VLFOODSEC_15_', 'FOODINSEC_15_'], axis=1)
y_test = df15['PCT_LACCESS_POP']


X_train = X_train.drop(columns=['PCT_LACCESS_POP', 'PCT_LACCESS_LOWI', 'PCT_LACCESS_HHNV',
       'GROCPTH', 'SUPERCPTH','CONVSPTH', 'SPECSPTH', 'SNAPSPTH', 'FFRPTH', 'FSRPTH', 'PC_FFRSALES',
       'PC_FSRSALES', 'DIRSALES_FARMS', 'FMRKTPTH'], axis=1)
X_test =  X_test.drop(columns=['PCT_LACCESS_POP', 'PCT_LACCESS_LOWI', 'PCT_LACCESS_HHNV',
       'GROCPTH', 'SUPERCPTH','CONVSPTH', 'SPECSPTH', 'SNAPSPTH', 'FFRPTH', 'FSRPTH', 'PC_FFRSALES',
       'PC_FSRSALES', 'DIRSALES_FARMS', 'FMRKTPTH'], axis=1)

print(X_train.columns)
print(X_test.columns)

Index(['PCT_FREE_LUNCH', 'PCT_REDUCED_LUNCH', 'PCT_SNAP', 'SNAP_PART_RATE',
       'PCT_NSLP', 'PCT_SBP', 'PCT_SFSP', 'FDPIR'],
      dtype='object')
Index(['PCT_FREE_LUNCH', 'PCT_REDUCED_LUNCH', 'PCT_SNAP', 'SNAP_PART_RATE',
       'PCT_NSLP', 'PCT_SBP', 'PCT_SFSP', 'FDPIR'],
      dtype='object')


In [11]:
import statsmodels.api as sm
X = sm.add_constant(X_train)
model = sm.OLS(y_train, X).fit()
model.summary()
print('R-squared:', model.rsquared)
print('P-values:', model.pvalues)

R-squared: 0.08965234707232628
P-values: const                9.727677e-66
PCT_FREE_LUNCH       4.902891e-03
PCT_REDUCED_LUNCH    1.311683e-04
PCT_SNAP             3.704307e-04
SNAP_PART_RATE       1.057186e-03
PCT_NSLP             1.417373e-04
PCT_SBP              8.310241e-01
PCT_SFSP             2.926358e-01
FDPIR                1.774391e-03
dtype: float64


In [15]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train,y_train)

print('Coefficients:', model.coef_)
print('Intercept:', model.intercept_)
print('Score:', model.score(X_test,y_test))

Coefficients: [-0.07537103  0.24127118 -0.14467745 -0.09583414  0.07173816  0.00805502
 -0.0924084   0.24787078]
Intercept: 0.32727746944119024
Score: 0.041247863206057


In [None]:
df_avg = df.groupby(['State']).mean()
df_avg.shape

In [None]:
df10_cols = ['PCT_LACCESS_POP10', 'PCT_LACCESS_LOWI10', 'PCT_LACCESS_HHNV10',
             'PCT_LACCESS_CHILD10','PCT_LACCESS_SENIORS10', 'PCT_FREE_LUNCH10', 'PCT_REDUCED_LUNCH10',
             'PCT_NHWHITE10', 'PCT_NHBLACK10', 'PCT_HISP10', 'PCT_NHASIAN10', 'PCT_NHNA10', 'PCT_NHPI10',
             'PCT_65OLDER10', 'PCT_18YOUNGER10',
             'GROCPTH11', 'SUPERCPTH11', 'CONVSPTH11', 'SPECSPTH11', 
             'SNAPSPTH12', 'FFRPTH11', 'FSRPTH11', 'PC_FFRSALES07','PC_FSRSALES07', 'PCT_SNAP12', 
             'SNAP_PART_RATE11', 'PCT_NSLP12','PCT_SBP12', 'PCT_SFSP12',
             'FDPIR12','FOODINSEC_12_14','VLFOODSEC_12_14','DIRSALES_FARMS07', 'FMRKTPTH13']


df15_cols = ['PCT_LACCESS_POP15', 'PCT_LACCESS_LOWI15', 'PCT_LACCESS_HHNV15', 
             'PCT_LACCESS_SNAP15', 'PCT_LACCESS_CHILD15', 'PCT_LACCESS_SENIORS15', 'PCT_LACCESS_WHITE15',
             'PCT_LACCESS_BLACK15','PCT_LACCESS_HISP15', 'PCT_LACCESS_NHASIAN15', 'PCT_LACCESS_NHNA15', 
             'PCT_LACCESS_NHPI15', 'PCT_LACCESS_MULTIR15', 'PCT_FREE_LUNCH15', 'PCT_REDUCED_LUNCH15', 
             'GROCPTH16', 'SUPERCPTH16', 'CONVSPTH16', 'SPECSPTH16', 'SNAPSPTH17', 'FFRPTH16', 'FSRPTH16',
             'PC_FFRSALES12', 'PC_FSRSALES12', 'PCT_SNAP17','SNAP_PART_RATE16', 'PCT_NSLP17', 'PCT_SBP17', 'PCT_SFSP17',
             'FDPIR15', 'FOODINSEC_15_17','VLFOODSEC_15_17', 'DIRSALES_FARMS12', 'FMRKTPTH18']

# Foodhub, metro 13, medhincome 15 'POVRATE15' are not here

df10 = pd.DataFrame(df_avg, columns = df10_cols)
df15 = pd.DataFrame(df_avg, columns = df15_cols)

In [None]:
df10.shape

In [None]:
# scaler = StandardScaler()

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

df10 = pd.DataFrame(scaler.fit_transform(df10), columns=df10.columns)
df15 = pd.DataFrame(scaler.fit_transform(df15), columns=df15.columns)

df10.rename(columns = lambda x : str(x)[:-2], inplace=True)
df15.rename(columns = lambda x : str(x)[:-2], inplace =True)

print(df10.columns)
print(df15.columns)

In [None]:
X_train = df10.drop(columns=['PCT_LACCESS_CHILD','PCT_LACCESS_SENIORS','PCT_NHWHITE', 'PCT_NHBLACK',
                       'PCT_HISP', 'PCT_NHASIAN', 'PCT_NHNA', 'PCT_NHPI', 
                       'PCT_65OLDER', 'PCT_18YOUNGER', 'VLFOODSEC_12_', 'FOODINSEC_12_'], axis=1)
y_train = df10['FOODINSEC_12_']


X_test = df15.drop(columns=['PCT_LACCESS_CHILD','PCT_LACCESS_SENIORS','PCT_LACCESS_SNAP','PCT_LACCESS_WHITE',
             'PCT_LACCESS_BLACK','PCT_LACCESS_HISP', 'PCT_LACCESS_NHASIAN', 'PCT_LACCESS_NHNA', 
             'PCT_LACCESS_NHPI', 'PCT_LACCESS_MULTIR', 'VLFOODSEC_15_', 'FOODINSEC_15_'], axis=1)
y_test = df15['FOODINSEC_15_']

print(X_train.columns)
print(X_test.columns)


In [None]:
X_train = X_train.drop(columns=['PCT_FREE_LUNCH', 'PCT_REDUCED_LUNCH', 'GROCPTH', 'SUPERCPTH',
       'CONVSPTH', 'SPECSPTH', 'SNAPSPTH', 'FFRPTH', 'FSRPTH', 'PC_FFRSALES',
       'PC_FSRSALES', 'PCT_SNAP', 'SNAP_PART_RATE', 'PCT_NSLP', 'PCT_SBP',
       'PCT_SFSP', 'FDPIR', 'DIRSALES_FARMS', 'FMRKTPTH'], axis=1)


X_test = X_test.drop(columns=['PCT_FREE_LUNCH', 'PCT_REDUCED_LUNCH', 'GROCPTH', 'SUPERCPTH',
       'CONVSPTH', 'SPECSPTH', 'SNAPSPTH', 'FFRPTH', 'FSRPTH', 'PC_FFRSALES',
       'PC_FSRSALES', 'PCT_SNAP', 'SNAP_PART_RATE', 'PCT_NSLP', 'PCT_SBP',
       'PCT_SFSP', 'FDPIR', 'DIRSALES_FARMS', 'FMRKTPTH'], axis=1)
print(X_train.columns)
print(X_test.columns)


In [None]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train,y_train)

print('Coefficients:', model.coef_)
print('Intercept:', model.intercept_)
print('Score:', model.score(X_train,y_train))

In [None]:
import statsmodels.api as sm
X = sm.add_constant(X_train)
model = sm.OLS(y_train, X).fit()
model.summary()
print('R-squared:', model.rsquared)
print('P-values:', model.pvalues)

In [None]:
X_train.columns

In [None]:
X_train = df10.drop(columns=['PCT_LACCESS_CHILD','PCT_LACCESS_SENIORS','PCT_NHWHITE', 'PCT_NHBLACK',
                       'PCT_HISP', 'PCT_NHASIAN', 'PCT_NHNA', 'PCT_NHPI', 
                       'PCT_65OLDER', 'PCT_18YOUNGER', 'VLFOODSEC_12_', 'FOODINSEC_12_'], axis=1)
y_train = df10['PCT_LACCESS_POP']


X_test = df15.drop(columns=['PCT_LACCESS_CHILD','PCT_LACCESS_SENIORS','PCT_LACCESS_SNAP','PCT_LACCESS_WHITE',
             'PCT_LACCESS_BLACK','PCT_LACCESS_HISP', 'PCT_LACCESS_NHASIAN', 'PCT_LACCESS_NHNA', 
             'PCT_LACCESS_NHPI', 'PCT_LACCESS_MULTIR', 'VLFOODSEC_15_', 'FOODINSEC_15_'], axis=1)
y_test = df15['PCT_LACCESS_POP']


X_train = X_train.drop(columns=['PCT_LACCESS_POP', 'PCT_LACCESS_LOWI', 'PCT_LACCESS_HHNV',
       'GROCPTH', 'SUPERCPTH','CONVSPTH', 'SPECSPTH', 'SNAPSPTH', 'FFRPTH', 'FSRPTH', 'PC_FFRSALES',
       'PC_FSRSALES', 'DIRSALES_FARMS', 'FMRKTPTH'], axis=1)
X_test =  X_test.drop(columns=['PCT_LACCESS_POP', 'PCT_LACCESS_LOWI', 'PCT_LACCESS_HHNV',
       'GROCPTH', 'SUPERCPTH','CONVSPTH', 'SPECSPTH', 'SNAPSPTH', 'FFRPTH', 'FSRPTH', 'PC_FFRSALES',
       'PC_FSRSALES', 'DIRSALES_FARMS', 'FMRKTPTH'], axis=1)

print(X_train.columns)
print(X_test.columns)

In [None]:
model = LinearRegression()
model.fit(X_train,y_train)

print('Coefficients:', model.coef_)
print('Intercept:', model.intercept_)
print('Score:', model.score(X_test,y_test))

In [None]:
sns.set(style='darkgrid')

def plt_dis(c):
    f = sns.displot(data=df10, x=c, height=4, aspect=10/8.27, bins=20)
    plt.show()

plt_dis('FOODINSEC_12_')

In [None]:
df10['State'] = df_avg.index

states = {
    'WA': '53', 'DE': '10', 'DC': '11', 'WI': '55', 'WV': '54', 'HI': '15',
    'FL': '12', 'WY': '56', 'PR': '72', 'NJ': '34', 'NM': '35', 'TX': '48',
    'LA': '22', 'NC': '37', 'ND': '38', 'NE': '31', 'TN': '47', 'NY': '36',
    'PA': '42', 'AK': '02', 'NV': '32', 'NH': '33', 'VA': '51', 'CO': '08',
    'CA': '06', 'AL': '01', 'AR': '05', 'VT': '50', 'IL': '17', 'GA': '13',
    'IN': '18', 'IA': '19', 'MA': '25', 'AZ': '04', 'ID': '16', 'CT': '09',
    'ME': '23', 'MD': '24', 'OK': '40', 'OH': '39', 'UT': '49', 'MO': '29',
    'MN': '27', 'MI': '26', 'RI': '44', 'KS': '20', 'MT': '30', 'MS': '28',
    'SC': '45', 'KY': '21', 'OR': '41', 'SD': '46'}

# for s in states: 
#     states[s] = states[s].upper()
    
df10.replace({"State": states}, inplace=True)

In [None]:
import json
with open('USA_Counties_(Generalized).geojson') as f:
    tract = json.load(f)

fig = px.choropleth(df10, locations='State',  
                        color='FOODINSEC_12_',
                        color_continuous_scale="RdBu_r",
                        range_color=(0, 1),
                        geojson=tract,
                        featureidkey = "properties.STATE_FIPS",
                        scope="usa", 
#                         hover_data=['State'],
#                         labels=label, 
                        title='ff'
                   )
fig.update_layout(legend=dict(orientation="h",yanchor="top",y=1.02,xanchor="right",x=1))
fig.show()

In [None]:
tract

In [None]:
df10