In [1]:
# dependencies
import pandas as pd
from census import Census
from us import states

# census API key
from config import api_key

### 2011 Census Call

In [2]:
mhi = 'B19013_001E'
mhv = 'B25077_001E'
wbd = 'B99151_001E'

c = Census(api_key, year=2011)

In [3]:
income_df = pd.DataFrame(c.acs5.zipcode(mhi, Census.ALL))
housing_df = pd.DataFrame(c.acs5.zipcode(mhv, Census.ALL))
bachelors_df = pd.DataFrame(c.acs5.zipcode(wbd, Census.ALL))
total_df = pd.DataFrame(c.acs5.zipcode('B01003_001E', Census.ALL))

df11 = income_df.merge(housing_df, on='zip code tabulation area')
df11 = df11.merge(bachelors_df, on='zip code tabulation area')
df11 = df11.merge(total_df, on='zip code tabulation area')

df11 = df11.rename(columns={'B19013_001E': 'Median Household Income', 'zip code tabulation area': 'Zip', 'B25077_001E': 'Median Home Value', 'B01003_001E': 'Total Population', 'B99151_001E': 'Have Bachelors+'})

df11['% Bachelors+'] = round(df11['Have Bachelors+']/df11['Total Population'] * 100, 2)

In [4]:
df11

Unnamed: 0,Median Household Income,Zip,Median Home Value,Have Bachelors+,Total Population,% Bachelors+
0,13318.0,00601,103200.0,11724.0,18533.0,63.26
1,14947.0,00602,89300.0,27432.0,41930.0,65.42
2,14437.0,00603,116700.0,36436.0,54475.0,66.89
3,11155.0,00606,101000.0,4038.0,6386.0,63.23
4,16367.0,00610,109400.0,19302.0,29111.0,66.30
...,...,...,...,...,...,...
33115,27500.0,99923,108300.0,77.0,116.0,66.38
33116,43125.0,99925,164400.0,525.0,689.0,76.20
33117,45216.0,99926,101100.0,919.0,1488.0,61.76
33118,17639.0,99927,123800.0,67.0,67.0,100.00


In [5]:
print(df11['Median Household Income'].isna().sum())
print(df11['Median Home Value'].isna().sum())
print(df11['Have Bachelors+'].isna().sum())

0
0
0


In [6]:
print(df11.loc[df11['Median Household Income'] <= 0])
print(df11.loc[df11['Median Home Value'] <= 0])
print(df11.loc[df11['Have Bachelors+'] <= 0])

       Median Household Income    Zip  Median Home Value  Have Bachelors+  \
26                -666666666.0  12862       -666666666.0              0.0   
64                -666666666.0  12933       -666666666.0              0.0   
98                -666666666.0  12977       -666666666.0           1638.0   
115               -666666666.0  13024       -666666666.0           1562.0   
187               -666666666.0  13138       -666666666.0             30.0   
...                        ...    ...                ...              ...   
33025             -666666666.0  99706       -666666666.0              0.0   
33038             -666666666.0  99732       -666666666.0              0.0   
33040             -666666666.0  99734       -666666666.0            206.0   
33062             -666666666.0  99757       -666666666.0              0.0   
33110             -666666666.0  99903             9999.0             16.0   

       Total Population  % Bachelors+  
26                  0.0           N

### 2018 Census Call

In [7]:
c = Census(api_key, year=2018)

In [8]:
income_df = pd.DataFrame(c.acs5.zipcode(mhi, Census.ALL))
housing_df = pd.DataFrame(c.acs5.zipcode(mhv, Census.ALL))
bachelors_df = pd.DataFrame(c.acs5.zipcode(wbd, Census.ALL))
total_df = pd.DataFrame(c.acs5.zipcode('B01003_001E', Census.ALL))

df18 = income_df.merge(housing_df, on='zip code tabulation area')
df18 = df18.merge(bachelors_df, on='zip code tabulation area')
df18 = df18.merge(total_df, on='zip code tabulation area')

df18 = df18.rename(columns={'B19013_001E': 'Median Household Income', 'zip code tabulation area': 'Zip', 'B25077_001E': 'Median Home Value', 'B01003_001E': 'Total Population', 'B99151_001E': 'Have Bachelors+'})

df18['% Bachelors+'] = round(df11['Have Bachelors+']/df11['Total Population'] * 100, 2)

In [9]:
print(df18['Median Household Income'].isna().sum())
print(df18['Median Home Value'].isna().sum())
print(df18['Have Bachelors+'].isna().sum())

35
0
0


In [68]:
df18.loc[df18['Median Household Income'] <= 0]['Median Household Income'].unique()

array([-6.66666666e+08])

In [10]:
print(df18.loc[df18['Median Household Income'] <= 0])
print(df18.loc[df18['Median Home Value'] <= 0])
print(df18.loc[df18['Have Bachelors+'] <= 0])

       Median Household Income    Zip  Median Home Value  Have Bachelors+  \
42                -666666666.0  00694       -666666666.0             69.0   
86                -666666666.0  00786            95700.0            348.0   
107               -666666666.0  00934       -666666666.0             60.0   
108               -666666666.0  00936       -666666666.0            772.0   
110               -666666666.0  00950       -666666666.0              0.0   
...                        ...    ...                ...              ...   
33044             -666666666.0  99774       -666666666.0              7.0   
33058             -666666666.0  99790       -666666666.0             10.0   
33075             -666666666.0  99903       -666666666.0              0.0   
33080             -666666666.0  99923       -666666666.0             15.0   
33083             -666666666.0  99927           272500.0             38.0   

       Total Population  % Bachelors+  
42                 69.0         62.

### Starbucks Data

In [11]:
sb_data = pd.read_csv('data_exploration/data/directory.csv')

In [12]:
sb_data.loc[sb_data['Country'] == 'US']['Ownership Type'].value_counts()

Company Owned    8226
Licensed         5382
Name: Ownership Type, dtype: int64

In [13]:
sb_data.loc[sb_data['Country'] == 'US']

Unnamed: 0,Brand,Store Number,Store Name,Ownership Type,Street Address,City,State/Province,Country,Postcode,Phone Number,Timezone,Longitude,Latitude
11964,Starbucks,3513-125945,Safeway-Anchorage #1809,Licensed,5600 Debarr Rd Ste 9,Anchorage,AK,US,995042300,907-339-0900,GMT-09:00 America/Anchorage,-149.78,61.21
11965,Starbucks,74352-84449,Safeway-Anchorage #2628,Licensed,1725 Abbott Rd,Anchorage,AK,US,995073444,907-339-2800,GMT-09:00 America/Anchorage,-149.84,61.14
11966,Starbucks,12449-152385,Safeway - Anchorage #1813,Licensed,1501 Huffman Rd,Anchorage,AK,US,995153596,907-339-1300,GMT-09:00 America/Anchorage,-149.85,61.11
11967,Starbucks,24936-233524,100th & C St - Anchorage,Company Owned,"320 W. 100th Ave, 100, Southgate Shopping Ctr ...",Anchorage,AK,US,99515,(907) 227-9631,GMT-09:00 America/Anchorage,-149.89,61.13
11968,Starbucks,8973-85630,Old Seward & Diamond,Company Owned,1005 E Dimond Blvd,Anchorage,AK,US,995152050,907-344-4160,GMT-09:00 America/Anchorage,-149.86,61.14
...,...,...,...,...,...,...,...,...,...,...,...,...,...
25567,Starbucks,74385-87621,Safeway-Laramie #2466,Licensed,554 N 3rd St,Laramie,WY,US,820723012,307-721-5107,GMT-07:00 America/Denver,-105.59,41.32
25568,Starbucks,73320-24375,Ridley's - Laramie #1131,Licensed,3112 E. Grand,Laramie,WY,US,820705141,307-742-8146,GMT-07:00 America/Denver,-105.56,41.31
25569,Starbucks,22425-219024,Laramie - Grand & 30th,Company Owned,3021 Grand Ave,Laramie,WY,US,82070,307-742-3262,GMT-07:00 America/Denver,-105.56,41.31
25570,Starbucks,10849-103163,I-80 & Dewar Dr-Rock Springs,Company Owned,118 Westland Way,Rock Springs,WY,US,829015751,307-362-7145,GMT-07:00 America/Denver,-109.25,41.58


In [43]:
sb_zeroed = sb_data.loc[sb_data['Country'] == 'US'].dropna()

In [44]:
splits = [int(''.join(str(i).split())[:5]) for i in sb_zeroed['Postcode']]

In [45]:
sb_zeroed['Zip'] = splits

In [46]:
len(sb_zeroed.loc[sb_zeroed['Zip'] == 0])

0

In [48]:
sb_groupby = sb_zeroed.groupby('Zip')

In [60]:
sb_count = sb_groupby.count()
sb_df = pd.DataFrame(sb_count['Brand']).reset_index()

In [62]:
# Final Starbucks Dependents
sb_df

Unnamed: 0,Zip,Brand
0,1035,1
1,1040,2
2,1056,1
3,1201,1
4,1331,1
...,...,...
5888,99701,2
5889,99705,1
5890,99709,3
5891,99801,2
