In [5]:
import geopandas as gpd
import pandas as pd

## Open State data

In [6]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_gdf = gpd.read_file(file_path)
states_gdf["NAME"] = states_gdf["NAME"].str.title()
states_gdf = states_gdf[["geometry", "NAME"]]

## Open Energy Data

In [7]:
state_total_energy_record_names = [
    f"{state_name} : all fuels (utility-scale)"
    for state_name in list(states_gdf["NAME"])
]

In [8]:
state_wind_energy_record_names = [
    f"{state_name.title()} : wind" for state_name in list(states_gdf["NAME"])
]

In [9]:
state_solar_energy_record_names = [
    f"{state_name.title()} : all solar" for state_name in list(states_gdf["NAME"])
]

In [10]:
energy_df = pd.read_csv("data/Net_generation_for_all_sectors.csv")

In [16]:
def format_wind_col(row) -> int:
    wind = row["2023"]
    if wind.isnumeric():
        return int(wind)
    else:
        print(row["description"], row["2023"])
        return 0

In [17]:
def format_solar_col(row) -> int:
    solar = row["2023"]
    if solar.isnumeric():
        return int(solar)
    else:
        print(row["description"], row["2023"])
        return 0

In [18]:
solar_df = energy_df[energy_df["description"].isin(state_solar_energy_record_names)]
solar_df["NAME"] = solar_df["description"].str.split(":").str[0]
solar_df["SOLAR"] = solar_df.apply(lambda row: format_solar_col(row), axis=1)
solar_df = solar_df[["NAME", "SOLAR"]].reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  solar_df["NAME"] = solar_df["description"].str.split(":").str[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  solar_df["SOLAR"] = solar_df.apply(lambda row: format_solar_col(row), axis=1)


In [13]:
total_df = energy_df[energy_df["description"].isin(state_total_energy_record_names)]
total_df["NAME"] = total_df["description"].str.split(":").str[0]
total_df["TOTAL"] = total_df["2023"].astype(int)
total_df = total_df[["NAME", "TOTAL"]].reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  total_df["NAME"] = total_df["description"].str.split(":").str[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  total_df["TOTAL"] = total_df["2023"].astype(int)


In [24]:
wind_df = energy_df[energy_df["description"].isin(state_wind_energy_record_names)]
wind_df["NAME"] = wind_df["description"].str.split(":").str[0]
wind_df["WIND"] = wind_df.apply(lambda row: format_wind_col(row), axis=1)
wind_df = wind_df[["NAME", "WIND"]].reset_index(drop=True)

District Of Columbia : wind --
Florida : wind --
Georgia : wind --
South Carolina : wind --
Alabama : wind --
Kentucky : wind --
Mississippi : wind --
Arkansas : wind --
Louisiana : wind --


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wind_df["NAME"] = wind_df["description"].str.split(":").str[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wind_df["WIND"] = wind_df.apply(lambda row: format_wind_col(row), axis=1)


In [28]:
total_with_solar_df = total_df.merge(solar_df, on="NAME", how="inner")

In [29]:
total_with_solar_df["ALL"] = total_with_solar_df["TOTAL"] + total_with_solar_df["SOLAR"]

In [30]:
formatted_energy_df = wind_df.merge(total_with_solar_df, how="inner", on="NAME")

In [32]:
formatted_energy_df["percent"] = (
    formatted_energy_df["WIND"] / formatted_energy_df["ALL"]
).round(decimals=3) * 100

## Merge Data

In [33]:
formatted_energy_df["NAME"] = formatted_energy_df["NAME"].str.strip()
states_gdf["NAME"] = states_gdf["NAME"].str.strip()

In [34]:
energy_per_state_gdf = states_gdf.merge(formatted_energy_df, on="NAME", how="inner")

In [37]:
energy_per_state_gdf.sort_values("percent", ascending=False)

Unnamed: 0,geometry,NAME,WIND,TOTAL,SOLAR,ALL,percent
31,"POLYGON ((-96.6397 42.73707, -96.63589 42.741,...",Iowa,41439,69837,911,70748,58.6
13,"POLYGON ((-104.05788 44.9976, -104.05078 44.99...",South Dakota,9376,17436,50,17486,53.6
25,"POLYGON ((-102.05174 40.00308, -101.9167 40.00...",Kansas,27273,58457,210,58667,46.5
2,"POLYGON ((-103.00256 36.52659, -103.00219 36.6...",Oklahoma,37012,89236,242,89478,41.4
12,"POLYGON ((-109.05017 31.48, -109.04984 31.4995...",New Mexico,14914,39269,3314,42583,35.0
46,"POLYGON ((-104.04868 48.86378, -104.04865 48.8...",North Dakota,14477,42069,2,42071,34.4
10,"POLYGON ((-104.05342 41.17054, -104.05324 41.1...",Nebraska,11845,39446,129,39575,29.9
20,"POLYGON ((-109.06025 38.59933, -109.05954 38.7...",Colorado,16091,57542,5319,62861,25.6
49,"MULTIPOLYGON (((-89.59206 47.96668, -89.59147 ...",Minnesota,14398,57277,2417,59694,24.1
14,"MULTIPOLYGON (((-94.7183 29.72886, -94.71721 2...",Texas,119855,547295,32325,579620,20.7


In [38]:
energy_per_state_gdf = energy_per_state_gdf.to_crs(9311)
energy_per_state_gdf.to_file("data/energy.gpkg")