In [75]:
import geopandas as gpd
import pandas as pd

## Open State data

In [76]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_gdf = gpd.read_file(file_path)
states_gdf["NAME"] = states_gdf["NAME"].str.title()
states_gdf = states_gdf[["geometry", "NAME"]]

## Open Energy Data

In [77]:
state_total_energy_record_names = [
    f"{state_name} : all fuels (utility-scale)"
    for state_name in list(states_gdf["NAME"])
]

In [78]:
state_solar_energy_record_names = [
    f"{state_name.title()} : all solar" for state_name in list(states_gdf["NAME"])
]

In [79]:
energy_df = pd.read_csv("data/Net_generation_for_all_sectors.csv")

In [80]:
def format_solar_col(row) -> int:
    solar = row["2023"]
    if solar.isnumeric():
        return int(solar)
    else:
        print(row["description"], row["2023"])
        return 0

In [81]:
solar_df = energy_df[energy_df["description"].isin(state_solar_energy_record_names)]
solar_df["NAME"] = solar_df["description"].str.split(":").str[0]
solar_df["SOLAR"] = solar_df.apply(lambda row: format_solar_col(row), axis=1)
solar_df = solar_df[["NAME", "SOLAR"]].reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  solar_df["NAME"] = solar_df["description"].str.split(":").str[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  solar_df["SOLAR"] = solar_df.apply(lambda row: format_solar_col(row), axis=1)


In [82]:
total_df = energy_df[energy_df["description"].isin(state_total_energy_record_names)]
total_df["NAME"] = total_df["description"].str.split(":").str[0]
total_df["TOTAL"] = total_df["2023"].astype(int)
total_df = total_df[["NAME", "TOTAL"]].reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  total_df["NAME"] = total_df["description"].str.split(":").str[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  total_df["TOTAL"] = total_df["2023"].astype(int)


In [83]:
formatted_energy_df = solar_df.merge(total_df, how="inner", on="NAME")

In [86]:
formatted_energy_df["percent"] = (
    formatted_energy_df["SOLAR"]
    / (formatted_energy_df["SOLAR"] + formatted_energy_df["TOTAL"])
).round(decimals=3) * 100

## Merge Data

In [90]:
formatted_energy_df["NAME"] = formatted_energy_df["NAME"].str.strip()
states_gdf["NAME"] = states_gdf["NAME"].str.strip()

In [91]:
energy_per_state_gdf = states_gdf.merge(formatted_energy_df, on="NAME", how="inner")

In [92]:
energy_per_state_gdf

Unnamed: 0,geometry,NAME,SOLAR,TOTAL,percent
0,"MULTIPOLYGON (((-88.50297 30.21524, -88.49176 ...",Mississippi,656,72933,0.9
1,"MULTIPOLYGON (((-75.72681 35.93584, -75.71827 ...",North Carolina,12216,126553,8.8
2,"POLYGON ((-103.00256 36.52659, -103.00219 36.6...",Oklahoma,242,89236,0.3
3,"MULTIPOLYGON (((-75.74241 37.80835, -75.74151 ...",Virginia,6129,91059,6.3
4,"POLYGON ((-82.6432 38.16909, -82.643 38.16956,...",West Virginia,47,52287,0.1
5,"MULTIPOLYGON (((-88.8677 29.86155, -88.86566 2...",Louisiana,592,97785,0.6
6,"MULTIPOLYGON (((-83.19159 42.03537, -83.18993 ...",Michigan,1566,120657,1.3
7,"MULTIPOLYGON (((-70.23405 41.28565, -70.22361 ...",Massachusetts,5724,19696,22.5
8,"POLYGON ((-117.24267 44.39655, -117.23484 44.3...",Idaho,1092,17842,5.8
9,"MULTIPOLYGON (((-80.17628 25.52505, -80.17395 ...",Florida,17804,259798,6.4


In [93]:
energy_per_state_gdf = energy_per_state_gdf.to_crs(9311)
energy_per_state_gdf.to_file("data/energy.gpkg")