In [1]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point

In [3]:
aqi_data_csv = "./datasets/updated_aqi_data_with_lat_lon.csv"

In [7]:
# Load the CSV file into a pandas dataframe
aqi_df = pd.read_csv(aqi_data_csv)
aqi_df.head()

Unnamed: 0,State,County,Year,Days_with_AQI,Good_Days,Moderate_Days,Unhealthy_for_Sensitive_Groups_Days,Unhealthy_Days,Very_Unhealthy_Days,Hazardous_Days,Max_AQI,Ninety_Percentile_AQI,Median_AQI,Days_CO,Days_NO2,Days_Ozone,Days_PM2.5,Days_PM10,latitude,longitude
0,Alabama,Baldwin,2024,111,98,13,0,0,0,0,66,51,33,0,0,23,88,0,30.567753,-87.732439
1,Alabama,Clay,2024,121,96,25,0,0,0,0,75,55,28,0,0,0,121,0,33.242339,-85.819651
2,Alabama,DeKalb,2024,152,120,32,0,0,0,0,77,58,42,0,0,116,36,0,34.452435,-85.766212
3,Alabama,Elmore,2024,28,28,0,0,0,0,0,50,47,41,0,0,28,0,0,32.580123,-86.125195
4,Alabama,Etowah,2024,120,74,46,0,0,0,0,93,66,42,0,0,14,106,0,34.03714,-86.022147


In [13]:
# Create a geometry column from the latitude and longitude columns
geometry = [Point(xy) for xy in zip(aqi_df['longitude'], aqi_df['latitude'])]

# Convert the pandas dataframe into a GeoDataFrame
gdf = gpd.GeoDataFrame(aqi_df, geometry=geometry)

In [15]:
# Set the coordinate reference system to WGS84 (EPSG:4326)
gdf.set_crs(epsg=4326, inplace=True)

Unnamed: 0,State,County,Year,Days_with_AQI,Good_Days,Moderate_Days,Unhealthy_for_Sensitive_Groups_Days,Unhealthy_Days,Very_Unhealthy_Days,Hazardous_Days,...,Ninety_Percentile_AQI,Median_AQI,Days_CO,Days_NO2,Days_Ozone,Days_PM2.5,Days_PM10,latitude,longitude,geometry
0,Alabama,Baldwin,2024,111,98,13,0,0,0,0,...,51,33,0,0,23,88,0,30.567753,-87.732439,POINT (-87.73244 30.56775)
1,Alabama,Clay,2024,121,96,25,0,0,0,0,...,55,28,0,0,0,121,0,33.242339,-85.819651,POINT (-85.81965 33.24234)
2,Alabama,DeKalb,2024,152,120,32,0,0,0,0,...,58,42,0,0,116,36,0,34.452435,-85.766212,POINT (-85.76621 34.45243)
3,Alabama,Elmore,2024,28,28,0,0,0,0,0,...,47,41,0,0,28,0,0,32.580123,-86.125195,POINT (-86.12520 32.58012)
4,Alabama,Etowah,2024,120,74,46,0,0,0,0,...,66,42,0,0,14,106,0,34.037140,-86.022147,POINT (-86.02215 34.03714)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
938,Wyoming,Sublette,2024,151,116,34,1,0,0,0,...,67,46,0,0,149,2,0,42.736927,-109.990591,POINT (-109.99059 42.73693)
939,Wyoming,Sweetwater,2024,176,173,2,1,0,0,0,...,44,32,0,1,85,87,3,41.623947,-108.970231,POINT (-108.97023 41.62395)
940,Wyoming,Teton,2024,182,154,28,0,0,0,0,...,58,43,0,0,145,27,10,43.913921,-110.638036,POINT (-110.63804 43.91392)
941,Wyoming,Uinta,2024,91,91,0,0,0,0,0,...,6,4,0,0,0,0,91,41.263185,-110.567852,POINT (-110.56785 41.26319)


In [17]:
# Export the GeoDataFrame to a GeoJSON file
gdf.to_file("output.geojson", driver="GeoJSON")