<a href="https://colab.research.google.com/github/yox1000/ev_data_analysis/blob/main/evanalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pandas geopandas matplotlib folium

In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
from folium.plugins import HeatMap
from shapely.geometry import Point
import matplotlib.pyplot as plt
from shapely.ops import unary_union
from itertools import count



In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
df = pd.read_csv("alt_fuel_stations (Jul 29 2021).csv")


In [None]:
df.head()

In [None]:
df.info()
df.columns

In [None]:
cols_to_drop = [
    "Station Phone", "Expected Date",
    # Drop EV Level1 through Geocode Status inclusive
    *df.loc[:, "EV Level1 EVSE Num":"Geocode Status"].columns,
    "Date Last Confirmed",
    "Intersection Directions",
    # Drop Updated At and everything to the right of it
    *df.loc[:, "Updated At":].columns,
    # Drop columns 10–14 (Access Days Time → NG PSI)
    *df.columns[10:15],
    # Explicitly drop these extra columns
    "Plus4", "Status Code", "NG Fill Type Code", "NG PSI"
]

df_clean = df.drop(columns=[c for c in cols_to_drop if c in df.columns])
df_clean.info()


In [None]:
df_clean["Street Address"].isnull().sum()
missing_addr = df_clean[df_clean["Street Address"].isna()]
missing_addr.head()

**Fill in missing addresses**

In [None]:
df_clean.loc[df_clean["ID"] == 82475, "Street Address"] = "Army Base, 1 Fort Shafter Bldg 434, Fort Shafter, HI 96858"
df_clean.loc[df_clean["ID"] == 169548, "Street Address"] = "141 Sutton Steam Plant Rd, Wilmington, NC 28401"

missing_addr = df_clean[df_clean["Street Address"].isna()]
missing_addr.head()

df_clean.loc[df_clean["ID"].isin([82475, 169548]), ["Station Name", "Street Address", "City", "State", "ZIP"]]

In [None]:
df_map = df_clean.dropna(subset=["Latitude", "Longitude"])

**Clean Map**

In [None]:
mask = df_clean["Street Address"].str.contains(
    "Maxwell Dr|Keystone Park Rd", case=False, na=False
)
df_clean.loc[mask, ["ID", "Station Name", "Street Address", "City", "State", "ZIP"]]


In [None]:
# Update ID 175080 → Maxwell Dr (Clifton Park, NY)
df_clean.loc[df_clean["ID"] == 175080, ["Latitude", "Longitude"]] = [42.86764998844894, -73.78219169844964]

# Update ID 191120 → Keystone Park (Derry, PA)
df_clean.loc[df_clean["ID"] == 191120, ["Latitude", "Longitude"]] = [40.37484274267125, -79.38030680031966]

#verify fixes

df_clean.loc[df_clean["ID"].isin([175080, 191120]), ["ID", "Station Name", "Street Address", "City", "State", "Latitude", "Longitude"]]



In [None]:
m = folium.Map(
    location=[df_clean["Latitude"].mean(), df_clean["Longitude"].mean()],
    zoom_start=4,
    tiles="CartoDB positron"  # clean basemap
)

from folium.plugins import MarkerCluster

marker_cluster = MarkerCluster().add_to(m)

for _, row in df_clean.iterrows():
    popup_text = f"""
    <b>{row['Station Name']}</b><br>
    {row['Street Address']}<br>
    {row['City']}, {row['State']} {row['ZIP']}
    """
    folium.Marker(
        location=[row["Latitude"], row["Longitude"]],
        popup=popup_text
    ).add_to(marker_cluster)


m

**Get only EV charging stations**

In [None]:
ev_only = df_clean[df_clean["Fuel Type Code"] == "ELEC"].copy()
ev_gdf = gpd.GeoDataFrame(
    ev_only,
    geometry=gpd.points_from_xy(ev_only.Longitude, ev_only.Latitude),
    crs="EPSG:4326"
)


In [None]:
from google.colab import files
uploaded = files.upload()


In [None]:
import zipfile
with zipfile.ZipFile("tl_2019_us_county.zip", "r") as zip_ref:
    zip_ref.extractall("county_shapefile")


In [None]:
import geopandas as gpd
counties = gpd.read_file("county_shapefile/tl_2019_us_county.shp").to_crs("EPSG:4326")
counties.head()


In [None]:
joined = gpd.sjoin(ev_gdf, counties, predicate="within")
county_counts = joined.groupby("GEOID").size().reset_index(name="ev_station_count")
counties_ev = counties.merge(county_counts, on="GEOID", how="left").fillna({"ev_station_count": 0})
counties_ev.plot(column="ev_station_count", cmap="viridis", legend=True, figsize=(12,8))

print(counties_ev.crs)
counties_ev.head()


In [None]:
import folium
from folium.features import GeoJson, GeoJsonTooltip
import geopandas as gpd
import numpy as np

# Simplify geometries for faster rendering
counties_simple = counties_ev.copy()
counties_simple["geometry"] = counties_simple["geometry"].simplify(0.01, preserve_topology=True)

# Compute a logarithmic scale for the EV station counts
counties_simple["log_ev_station_count"] = np.log10(counties_simple["ev_station_count"] + 1)

# Create base map
m = folium.Map(
    location=[37.8, -96],
    zoom_start=4,
    tiles="CartoDB positron"
)

# Add choropleth layer with logarithmic scale
choropleth = folium.Choropleth(
    geo_data=counties_simple,
    data=counties_simple,
    columns=["GEOID", "log_ev_station_count"],
    key_on="feature.properties.GEOID",
    fill_color="YlOrRd",         # gradient palette
    fill_opacity=0.8,
    line_opacity=0,              # removes blue borders
    legend_name="Log10 of EV Stations per County"
)
choropleth.add_to(m)

# Add hover tooltips (county name + raw count)
GeoJson(
    counties_simple,
    style_function=lambda x: {"fillOpacity": 0, "color": "transparent"},
    tooltip=GeoJsonTooltip(
        fields=["NAME", "ev_station_count"],
        aliases=["County:", "EV Stations:"],
        localize=True
    )
).add_to(m)

# Display the interactive map
m


In [None]:
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
import numpy as np

# base map
coverage_solid_map = folium.Map(
    location=[37.8, -96],
    zoom_start=4,
    tiles="CartoDB positron"
)

# faint county outlines
folium.GeoJson(
    counties_ev,
    style_function=lambda x: {
        "fillOpacity": 0,
        "color": "#555555",
        "weight": 0.2
    }
).add_to(coverage_solid_map)

# circle parameters
radius_miles = 2
radius_meters = radius_miles * 1609
fill_opacity = 0.08

# create a colormap from yellow → purple
colormap = cm.get_cmap("plasma")  # or try "inferno", "magma", "viridis"

# generate N distinct colors across that colormap
colors_list = [colors.rgb2hex(colormap(i)) for i in np.linspace(0, 1, 10)]

# assign each circle a color based on a simple random pick (visually looks blended)
for i, (_, row) in enumerate(ev_gdf.iterrows()):
    fill_color = colors_list[i % len(colors_list)]  # cycle through color list
    folium.Circle(
        location=[row["Latitude"], row["Longitude"]],
        radius=radius_meters,
        color=None,
        fill=True,
        fill_color=fill_color,
        fill_opacity=fill_opacity
    ).add_to(coverage_solid_map)

coverage_solid_map
