In [1]:
import geopandas as gpd
import pandas as pd

## Open State data

In [2]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_gdf = gpd.read_file(file_path)
states_gdf["NAME"] = states_gdf["NAME"].str.title()
states_gdf = states_gdf[["geometry", "NAME"]]

## Open Energy Data

In [3]:
energy_df = pd.read_csv("data/Net_generation_for_all_sectors.csv")

In [4]:
energy_df["STATE"] = energy_df["description"].str.split(":").str[0].str.strip()
energy_df["type_of_energy"] = energy_df["description"].str.split(":").str[1].str.strip()

In [5]:
energy_df["type_of_energy"].unique()

array([nan, 'all fuels (utility-scale)', 'coal', 'petroleum liquids',
       'petroleum coke', 'natural gas', 'other gases', 'nuclear',
       'conventional hydroelectric', 'other renewables', 'wind',
       'all utility-scale solar', 'utility-scale photovoltaic',
       'utility-scale thermal', 'geothermal', 'biomass',
       'wood and wood-derived fuels', 'other biomass',
       'hydro-electric pumped storage', 'other', 'all solar',
       'small-scale solar photovoltaic'], dtype=object)

In [6]:
ref_dict = {
    "all fuels (utility-scale)": "ALL_FUEL",
    "coal": "COAL",
    "petroleum liquids": "PETRO",
    "petroleum coke": "COKE",
    "natural gas": "GAS",
    "other gases": "GAS_OTHER",
    "nuclear": "NUCLEAR",
    "conventional hydroelectric": "HYDRO",
    #'other renewables': "OTHER_RENEWABLES_TOTAL",
    "wind": "WIND",
    "all utility-scale solar": "ALL_UTILITY_SOLAR",
    #'utility-scale photovoltaic',
    #'utility-scale thermal',
    "geothermal": "GEOTHERMAL",
    "biomass": "BIOMASS",
    #'hydro-electric pumped storage',
    "other": "OTHER",
    "all solar": "SOLAR",
    "small-scale solar photovoltaic": "SOLAR_OTHER",
}

In [7]:
def format_int(item) -> int:
    try:
        return int(item)
    except ValueError:
        print(item)
        return 0

In [9]:
states_list = []
for i in range(len(states_gdf)):
    state_dict = {}
    state_energy_df = energy_df[energy_df["STATE"] == states_gdf.iloc[i]["NAME"]]
    if len(state_energy_df) == 0:
        continue
    for k, p in ref_dict.items():
        stat = state_energy_df[state_energy_df["type_of_energy"] == k]["2023"].iloc[0]
        state_dict[ref_dict[k]] = format_int(stat)
    state_dict["NAME"] = states_gdf.iloc[i]["NAME"]
    states_list.append(state_dict)

--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--


In [22]:
energy_states_df = pd.DataFrame(states_list)

In [15]:
def calculate_percent(row) -> float:
    val = row["SOLAR"]
    total = row["ALL_FUEL"] + row["SOLAR_OTHER"]
    return round(val / total, 3) * 100

In [16]:
energy_states_df["PERCENT"] = energy_states_df.apply(
    lambda row: calculate_percent(row), axis=1
)

In [24]:
def calculate_biomass(row) -> float:
    val = row["BIOMASS"]
    total = row["ALL_FUEL"] + row["SOLAR_OTHER"]
    return round(val / total, 3) * 100

In [25]:
energy_states_df["PERCENT_BIOMASS"] = energy_states_df.apply(
    lambda row: calculate_biomass(row), axis=1
)

## Merge Data

In [19]:
energy_per_state_gdf = states_gdf.merge(energy_states_df, on="NAME", how="inner")

In [20]:
energy_per_state_gdf = energy_per_state_gdf.to_crs(9311)
energy_per_state_gdf.to_file("data/energy.gpkg")