In [110]:
import json

In [111]:
import requests
import geopandas as gpd

## Open GIS Data

In [112]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

## Get Population Data

In [113]:
state_populations = pd.read_excel(
    "data/NST-EST2023-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [114]:
state_populations_df = state_populations["NST-EST2023-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [115]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Get Winery Data

In [116]:
r = requests.get("https://winemaps.com/api/wine-maps-winery")

In [117]:
feature_collection = r.json()
for i, feature in enumerate(feature_collection["features"]):
    coords = feature["geometry"]["coordinates"]
    try:
        feature_collection["features"][i]["geometry"]["coordinates"] = [
            float(coords[0]),
            float(coords[1]),
        ]
    except ValueError:
        print(feature["properties"]["title"], feature["properties"]["address"])

Marsden Estate  56 Wiroa Road Keri Keri
Barkers Marque Wines 499 Reserve Road, RD 1, Seddon
Beach House Wines A. 93 Mere Road Hastings Hawkes Bay
I. Brajkovich 76 Guys Road, Kaikohe
Matahiwi Vineyard 286 Paierau Road RD 1, Masteron Wairarapa
Misconduct Wine Co. 375 Upper Branch Road North Penticton
Naked Range Wines 125 Rifle Range Road Smiths Gully
Hannay Wines Valley Green, 41 Viljoenshoop Road Elgin
Catherine Marshall Lavinia, 1377 Polkadraai Road (M12/Stellenbosch Arterial) Stellenbosch
South Hill Vineyards 113 Elgin Valley Road Elgin
Boekenhoutskloof Winery Excelsior Road Franschhoek
Dieu Donne Vineyards        Dieu Donne Uitkyk Street Franschhoek
La Bri La Bri Wine Estate, Excelsior Road Franschhoek
Mont Rochelle 1499 Dassenberg Road Franschhoek
Veraison Vineyard Dassenberg Road Franschhoek
Neyen Camino Apalta Km. 11 Colchagua
Quinta da Aveleira National Road, Tavora, 7 Tabuaço
Tarapacá Fundo El Rosario de Naltahua S/N 
Gisborne Wine Centre Shed 3, 50 The Esplanade Inner Harbour 

In [118]:
with open("data/wineries.geojson", "w") as f:
    f.write(json.dumps(feature_collection))

In [119]:
wineries_gdf = gpd.GeoDataFrame.from_file("data/wineries.geojson")

In [120]:
states_df[wineries_gdf["geometry"].iloc[0].intersects(states_df["geometry"])]["STUSPS"]

16    CA
Name: STUSPS, dtype: object

In [144]:
wineries_gdf = wineries_gdf[wineries_gdf["geometry"] != None]

In [145]:
def calculate_state(input_point, input_polygons):
    output_df = input_polygons[input_point.intersects(input_polygons["geometry"])][
        "STUSPS"
    ]
    if output_df.empty:
        return
    return output_df.iloc[0]

In [200]:
wineries_gdf["state_code"] = wineries_gdf["geometry"].apply(
    lambda geometry: calculate_state(geometry, states_df)
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [201]:
wineries_by_state_series = wineries_gdf.groupby(["state_code"]).size()

In [202]:
wineries_by_state_df = pd.DataFrame(
    {
        "STUSPS": wineries_by_state_series.index,
        "wineries": wineries_by_state_series.values,
    }
)

## Merge Data

In [203]:
wineries_per_state_df = states_with_population_df.merge(
    wineries_by_state_df, on="STUSPS", how="left"
)

In [204]:
wineries_per_state_df = wineries_per_state_df.fillna(0)

In [205]:
wineries_per_state_df["per_100k"] = wineries_per_state_df["wineries"] / (
    wineries_per_state_df["POPULATION"] / 100000
)
wineries_per_state_df["per_500k"] = wineries_per_state_df["wineries"] / (
    wineries_per_state_df["POPULATION"] / 500_000
)
wineries_per_state_df["per_1m"] = wineries_per_state_df["wineries"] / (
    wineries_per_state_df["POPULATION"] / 1_000_000
)

In [206]:
wineries_per_state_df.to_file("data/wineries.gpkg")