In [1]:
import requests
import geopandas as gpd
import pandas as pd

In [17]:
from shapely.geometry import Point

In [51]:
from tqdm.notebook import tqdm

## Open GIS Data

In [None]:
file_path = 'data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp'
states_df = gpd.read_file(file_path)

## Get Brewery Data

In [2]:
url = 'https://www.craftbeer.com/wp-content/themes/craftbeer2023/breweries/breweries.json'

In [3]:
r = requests.get(url)

In [4]:
breweries_json = r.json()

In [13]:
breweries_json[0]

{'attributes': {'type': 'Account',
  'url': '/services/data/v52.0/sobjects/Account/0014x00000x9ty2AAA'},
 'Id': '0014x00000x9ty2AAA',
 'Name': 'Barley POP! Brewing',
 'Parent': None,
 'Phone': '(360) 610-6843',
 'Website': 'https://www.barleypopbeer.com/',
 'Brewery_Type__c': 'Micro',
 'BillingAddress': {'city': 'Snohomish',
  'country': 'United States',
  'countryCode': 'US',
  'geocodeAccuracy': 'Address',
  'latitude': 47.924323,
  'longitude': -122.096792,
  'postalCode': '98290',
  'state': 'Washington',
  'stateCode': 'WA',
  'street': '1208 10th Street'},
 'Is_Craft_Brewery__c': True,
 'Voting_Member__c': True,
 'Signed_Seal_Licensing_agreement__c': True,
 'Membership_Record_Item__c': 'Monthly Brewery Membership',
 'Membership_Record_Paid_Through_Date__c': '2024-12-20',
 'Membership_Record_Status__c': 'Active',
 'Brewery_DB_ID__c': None}

In [66]:
coord_list = []
state_counts = {}
for brewery in tqdm(breweries_json, desc='Parsing Breweries'):
    if brewery['BillingAddress']['longitude'] != None:

        point = Point(brewery['BillingAddress']['longitude'], brewery['BillingAddress']['latitude'])
        try:
            state_code = states_df[states_df.intersects(point)]['STUSPS'].iloc[0]
        except IndexError:
            print(brewery['Name'])
            continue
    else:
        state_code = brewery['BillingAddress']['stateCode']
        if state_code == None:
            if not brewery['Name'].endswith('Household'):
                print(brewery['Name'])
            continue

    if state_code not in state_counts:
        state_counts[state_code] = 1
    else:
        state_counts[state_code] += 1

Parsing Breweries:   0%|          | 0/14542 [00:00<?, ?it/s]

Tower Brewing
Nickel Brook Brewing Co.
Brewpoint
Cowbell Brewing Co.
kroger
Century Barn Brewing and Beverage Company
H2 BrewHouse
RAM Restaurant and Brewery - Tacoma
Del Barril
Otter Creek Brewing
Crafted
Tallgrass Tap House
Keeper’s Cut Meadery
Orpheus Brewing
Horace Grove Brewery
Platform Beer Co
Arcadia Brewing
Ale Asylum
People’s Republic of South Central
Great Frontier Holdings
ENAY
Sacred Vice Brewing - Howard Street
Colsons Beer
Ecliptic Brewing
Isley Brewing
Dillinger
Honky Tonk Brewing Co
Elk Valley Brewing
Astro-Zombie Bio-Labs
Shebeen Brewing Company
Flying Fish
JuneShine
Griffon & Sphynx
Gun Hill Brewing Company
Deep Ellum Brewing Co
Metropolitan Brewing
Blackberry Farm Brewery
Seldom Beer Co.
Bravazzi
Midway 154 Brewing
Edge Brewing
Peace Tree Brewing


### Manually Look up the rest

In [72]:
manually_added = {"WA": 1, "PR":1, "VT":1, "KS":1, "NC":2, "WI":1, "CA":2, "PA":1, "OR":1, "TN":2, "MD":1, "TX":1, "IL":2, "ID":1}

In [74]:
for state, counts in manually_added.items():
    state_counts[state]+=counts

In [81]:
breweries_per_state_series = pd.Series(state_counts, name="breweries")

In [129]:
breweries_per_state_df = breweries_per_state_series.reset_index()
breweries_per_state_df = breweries_per_state_df.rename(columns={'index':'STUSPS'})

## Get State Population data

In [115]:
state_populations = pd.read_excel('data/NST-EST2023-POP.xlsx', sheet_name=None, engine='openpyxl')

In [118]:
state_populations_df = state_populations['NST-EST2023-POP'][['table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)','Unnamed: 5']]
state_populations_df = state_populations_df.rename(columns={'table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)': "NAME",'Unnamed: 5':'POPULATION'})
state_populations_df['NAME'] = state_populations_df['NAME'].str[1:]

In [122]:
states_with_population_df = states_df.merge(state_populations_df, on='NAME', how='left')
states_with_population_df = states_with_population_df[['STUSPS','POPULATION','geometry']]

In [125]:
states_with_population_df = states_with_population_df.dropna()

## Format data and Calculate per 100k

In [132]:
breweries_per_state_df = states_with_population_df.merge(breweries_per_state_df, on='STUSPS', how='left')

In [143]:
breweries_per_state_df['per_100k'] = breweries_per_state_df['breweries']/(breweries_per_state_df['POPULATION']/100000)

In [148]:
breweries_per_state_df.to_file('data/breweries.gpkg')