In [2]:
import geopandas as gpd
import pandas as pd

## Open State data

In [3]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_gdf = gpd.read_file(file_path)
states_gdf["NAME"] = states_gdf["NAME"].str.title()
states_gdf = states_gdf[["geometry", "NAME"]]

## Open Energy Data

In [4]:
state_total_energy_record_names = [
    f"{state_name} : all fuels (utility-scale)"
    for state_name in list(states_gdf["NAME"])
]

In [5]:
state_solar_energy_record_names = [
    f"{state_name.title()} : all solar" for state_name in list(states_gdf["NAME"])
]

In [6]:
state_nuclear_energy_record_names = [
    f"{state_name.title()} : nuclear" for state_name in list(states_gdf["NAME"])
]

In [7]:
energy_df = pd.read_csv("data/Net_generation_for_all_sectors.csv")

In [8]:
def format_nuclear_col(row) -> int:
    nuclear = row["2023"]
    if nuclear.isnumeric():
        return int(nuclear)
    else:
        print(row["description"], row["2023"])
        return 0

In [9]:
nuclear_df = energy_df[energy_df["description"].isin(state_nuclear_energy_record_names)]
nuclear_df["NAME"] = nuclear_df["description"].str.split(":").str[0]
nuclear_df["NUCLEAR"] = nuclear_df.apply(lambda row: format_nuclear_col(row), axis=1)
nuclear_df = nuclear_df[["NAME", "NUCLEAR"]].reset_index(drop=True)

Maine : nuclear --
Massachusetts : nuclear --
Rhode Island : nuclear --
Vermont : nuclear --
Indiana : nuclear --
Iowa : nuclear --
North Dakota : nuclear --
South Dakota : nuclear --
Delaware : nuclear --
District Of Columbia : nuclear --
West Virginia : nuclear --
Kentucky : nuclear --
Oklahoma : nuclear --
Colorado : nuclear --
Idaho : nuclear --
Montana : nuclear --
Nevada : nuclear --
New Mexico : nuclear --
Utah : nuclear --
Wyoming : nuclear --
Oregon : nuclear --
Alaska : nuclear --
Hawaii : nuclear --


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nuclear_df["NAME"] = nuclear_df["description"].str.split(":").str[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nuclear_df["NUCLEAR"] = nuclear_df.apply(lambda row: format_nuclear_col(row), axis=1)


In [11]:
solar_df = energy_df[energy_df["description"].isin(state_solar_energy_record_names)]
solar_df["NAME"] = solar_df["description"].str.split(":").str[0]
solar_df["SOLAR"] = solar_df["2023"].astype(int)
solar_df = solar_df[["NAME", "SOLAR"]].reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  solar_df["NAME"] = solar_df["description"].str.split(":").str[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  solar_df["SOLAR"] = solar_df["2023"].astype(int)


In [12]:
total_df = energy_df[energy_df["description"].isin(state_total_energy_record_names)]
total_df["NAME"] = total_df["description"].str.split(":").str[0]
total_df["TOTAL"] = total_df["2023"].astype(int)
total_df = total_df[["NAME", "TOTAL"]].reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  total_df["NAME"] = total_df["description"].str.split(":").str[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  total_df["TOTAL"] = total_df["2023"].astype(int)


In [13]:
total_with_solar_df = total_df.merge(solar_df, on="NAME", how="inner")

In [14]:
total_with_solar_df["ALL"] = total_with_solar_df["TOTAL"] + total_with_solar_df["SOLAR"]

In [15]:
formatted_energy_df = nuclear_df.merge(total_with_solar_df, how="inner", on="NAME")

In [17]:
formatted_energy_df["percent"] = (
    formatted_energy_df["NUCLEAR"] / formatted_energy_df["ALL"]
).round(decimals=3) * 100

## Merge Data

In [25]:
formatted_energy_df["NAME"] = formatted_energy_df["NAME"].str.strip()
states_gdf["NAME"] = states_gdf["NAME"].str.strip()

In [26]:
energy_per_state_gdf = states_gdf.merge(formatted_energy_df, on="NAME", how="inner")

In [27]:
energy_per_state_gdf = energy_per_state_gdf.to_crs(9311)
energy_per_state_gdf.to_file("data/energy.gpkg")