In [1]:
import geopandas as gpd
import pandas as pd

## Open State data

In [2]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_gdf = gpd.read_file(file_path)
states_gdf["NAME"] = states_gdf["NAME"].str.title()
states_gdf = states_gdf[["geometry", "NAME"]]

## Open Energy Data

In [3]:
energy_df = pd.read_csv("data/Net_generation_for_all_sectors.csv")

In [4]:
energy_df["STATE"] = energy_df["description"].str.split(":").str[0].str.strip()
energy_df["type_of_energy"] = energy_df["description"].str.split(":").str[1].str.strip()

In [5]:
energy_df["type_of_energy"].unique()

array([nan, 'all fuels (utility-scale)', 'coal', 'petroleum liquids',
       'petroleum coke', 'natural gas', 'other gases', 'nuclear',
       'conventional hydroelectric', 'other renewables', 'wind',
       'all utility-scale solar', 'utility-scale photovoltaic',
       'utility-scale thermal', 'geothermal', 'biomass',
       'wood and wood-derived fuels', 'other biomass',
       'hydro-electric pumped storage', 'other', 'all solar',
       'small-scale solar photovoltaic'], dtype=object)

In [6]:
ref_dict = {
    "all fuels (utility-scale)": "ALL_FUEL",
    "coal": "COAL",
    "petroleum liquids": "PETRO",
    "petroleum coke": "COKE",
    "natural gas": "GAS",
    "other gases": "GAS_OTHER",
    "nuclear": "NUCLEAR",
    "conventional hydroelectric": "HYDRO",
    #'other renewables': "OTHER_RENEWABLES_TOTAL",
    "wind": "WIND",
    "all utility-scale solar": "ALL_UTILITY_SOLAR",
    #'utility-scale photovoltaic',
    #'utility-scale thermal',
    "geothermal": "GEOTHERMAL",
    "biomass": "BIOMASS",
    #'hydro-electric pumped storage',
    "other": "OTHER",
    "all solar": "SOLAR",
    "small-scale solar photovoltaic": "SOLAR_OTHER",
}

In [7]:
def format_int(item) -> int:
    try:
        return int(item)
    except ValueError:
        print(item)
        return 0

In [None]:
states_list = []
for i in range(len(states_gdf)):
    state_dict = {}
    state_energy_df = energy_df[energy_df["STATE"] == states_gdf.iloc[i]["NAME"]]
    if len(state_energy_df) == 0:
        continue
    for k, p in ref_dict.items():
        stat = state_energy_df[state_energy_df["type_of_energy"] == k]["2023"].iloc[0]
        state_dict[ref_dict[k]] = format_int(stat)
    state_dict["NAME"] = states_gdf.iloc[i]["NAME"]
    states_list.append(state_dict)

Parsing States:   0%|          | 0/56 [00:00<?, ?it/s]

--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--
--


In [9]:
energy_states_df = pd.DataFrame(states_list)

In [14]:
def calculate_percent(row) -> float:
    val = row["NUCLEAR"]
    total = row["ALL_FUEL"] + row["SOLAR_OTHER"]
    return round(val / total, 3) * 100

In [15]:
energy_states_df["PERCENT"] = energy_states_df.apply(
    lambda row: calculate_percent(row), axis=1
)

## Merge Data

In [16]:
energy_per_state_gdf = states_gdf.merge(energy_states_df, on="NAME", how="inner")

In [17]:
energy_per_state_gdf.sort_values("PERCENT", ascending=False)

Unnamed: 0,geometry,NAME,ALL_FUEL,COAL,PETRO,COKE,GAS,GAS_OTHER,NUCLEAR,HYDRO,WIND,ALL_UTILITY_SOLAR,GEOTHERMAL,BIOMASS,OTHER,SOLAR,SOLAR_OTHER,MOST_ENERGY,PERCENT
33,"MULTIPOLYGON (((-70.61702 42.97718, -70.61529 ...",New Hampshire,16825,160,68,0,4226,0,9535,1592,411,3,0,779,51,302,299,NUCLEAR,55.7
32,"MULTIPOLYGON (((-79.50795 33.02008, -79.50713 ...",South Carolina,100853,15150,96,0,23691,0,55622,2199,0,2762,0,1741,44,3351,589,NUCLEAR,54.8
28,"POLYGON ((-91.51297 40.18106, -91.51107 40.188...",Illinois,177738,27150,26,0,28445,248,97559,92,21808,1911,0,242,256,3447,1536,NUCLEAR,54.4
22,"POLYGON ((-90.3103 35.0043, -90.30988 35.00975...",Tennessee,77791,15793,102,0,15030,11,37937,8042,16,961,0,452,0,1034,72,NUCLEAR,48.7
36,"POLYGON ((-75.5591 39.62906, -75.55945 39.6298...",New Jersey,64229,0,42,0,33056,161,28335,12,18,1544,0,671,554,4710,3166,GAS,42.0
37,"MULTIPOLYGON (((-76.05015 37.9869, -76.04998 3...",Maryland,36001,1709,64,0,15336,0,14984,1849,482,943,0,322,312,2325,1382,GAS,40.1
1,"MULTIPOLYGON (((-75.72681 35.93584, -75.71827 ...",North Carolina,126553,14205,84,0,51788,0,42336,4225,529,11459,0,1692,235,12216,757,GAS,33.3
16,"MULTIPOLYGON (((-88.05338 30.50699, -88.05109 ...",Alabama,139435,19182,25,0,61848,0,45579,8430,0,1201,0,3171,0,1223,22,GAS,32.7
50,"MULTIPOLYGON (((-72.76143 41.24233, -72.75973 ...",Connecticut,40666,0,124,0,24952,0,13669,465,10,421,0,591,429,1659,1238,GAS,32.6
3,"MULTIPOLYGON (((-75.74241 37.80835, -75.74151 ...",Virginia,91059,1415,135,0,50605,0,29663,1391,47,5421,0,3361,477,6129,708,GAS,32.3


In [18]:
energy_per_state_gdf = energy_per_state_gdf.to_crs(9311)
energy_per_state_gdf.to_file("data/energy.gpkg")