In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

  from pandas.core.computation.check import NUMEXPR_INSTALLED


Load the dataset

In [2]:
df = pd.read_csv('Electric_Vehicle_Population_Data_20260126.csv')

In [3]:
df['Model Year'] = pd.to_numeric(df['Model Year'], errors='coerce')
df['Electric Range'] = pd.to_numeric(df['Electric Range'], errors='coerce')

In [4]:
df_wa = df[df['State'] == 'WA'].copy()
coords = df_wa['Vehicle Location'].str.extract(r'POINT \((?P<Lon>.*) (?P<Lat>.*)\)')
df_wa['Lat'] = pd.to_numeric(coords['Lat'], errors='coerce')
df_wa['Lon'] = pd.to_numeric(coords['Lon'], errors='coerce')

print(f"Data Prepared. Analysis for {len(df_wa)} records starting...")

Data Prepared. Analysis for 270454 records starting...


ANALYTICAL QUESTIONS

Q1. How has the total number of EVs registered changed year-over-year?

In [5]:
q1 = df_wa.groupby('Model Year').size().reset_index(name='Count')
px.line(q1, x='Model Year', y='Count', markers=True, 
        title="Q1: Yearly EV Registration Growth in WA State").show()

Q2. Which manufacturers (Makes) own the largest share of the EV market?

In [6]:
q2 = df_wa['Make'].value_counts().head(10).reset_index()
q2.columns = ['Manufacturer', 'Count']
px.bar(q2, x='Manufacturer', y='Count', color='Count', 
       title="Q2: Top 10 Manufacturers by Market Share").show()

Q3. How has the average Electric Range evolved by Model Year?

In [7]:
q3_data = df_wa[df_wa['Electric Range'] > 0].groupby('Model Year')['Electric Range'].mean().reset_index()
px.line(q3_data, x='Model Year', y='Electric Range', title="Q3: Average Electric Range Evolution").show()

Q4. Which counties have the highest density of EV registrations?

In [8]:
q4_data = df_wa['County'].value_counts().head(10).reset_index()
q4_data.columns = ['County', 'Count']
px.bar(q4_data, x='County', y='Count', title="Q4: Top 10 Counties by EV Population").show()

Q5. Which manufacturers offer the highest average electric range? (Top 10)

In [9]:
range_by_make = df_wa[df_wa['Electric Range'] > 0].groupby('Make')['Electric Range'].mean().reset_index()
range_by_make = range_by_make.sort_values('Electric Range', ascending=False).head(10)

fig5 = px.bar(range_by_make, x='Make', y='Electric Range', color='Electric Range',
             title="Q5: Top 10 Manufacturers by Average Electric Range",
             labels={'Electric Range': 'Average Range (miles)'},
             template="plotly_white")
fig5.show()

Q6. What percentage of the current fleet is eligible for Clean Alternative Fuel Vehicle (CAFV) benefits?

In [10]:
q6_data = df_wa['Clean Alternative Fuel Vehicle (CAFV) Eligibility'].value_counts().reset_index()
q6_data.columns = ['Eligibility', 'Count']
px.pie(q6_data, values='Count', names='Eligibility', title="Q6: CAFV Eligibility Distribution").show()

Q7. What are the top 10 specific EV models currently on the road?

In [11]:
q7_data = df_wa.groupby(['Make', 'Model']).size().reset_index(name='Count').sort_values('Count', ascending=False).head(10)
px.bar(q7_data, x='Model', y='Count', color='Make', title="Q7: Top 10 Most Popular EV Models").show()

Q8. How are EVs distributed across different Legislative Districts?

In [12]:
q8_data = df_wa['Legislative District'].value_counts().reset_index()
q8_data.columns = ['District', 'Count']
px.bar(q8_data.sort_values('District'), x='District', y='Count', 
       title="Q8: Distribution by Legislative District").show()

Q9. Which electric utilities are managing the highest number of EVs?

In [13]:
top_5 = df_wa['Make'].value_counts().head(5).index
q9 = df_wa[(df_wa['Make'].isin(top_5)) & (df_wa['Electric Range'] > 0)]
px.violin(q9, x='Make', y='Electric Range', box=True, color='Make', 
          title="Q9: Electric Range Distribution for Top 5 Manufacturers").show()

Q10. Do certain manufacturers focus more on long-range BEVs vs. short-range PHEVs?

In [14]:
px.strip(q9, x='Make', y='Model Year', color='Make', 
         title="Q10: Distribution of Model Years across Leading Brands").show()

Q11. Is there a correlation between census tracts and EV adoption rates?

In [15]:

q11 = df_wa['County'].value_counts().head(10).reset_index()
q11.columns = ['County', 'Count']
px.bar(q11, x='County', y='Count', title="Q11: Top 10 EV Counties in WA").show()

Q12. What are the top 15 cities driving EV adoption?

In [16]:

q12 = df_wa['City'].value_counts().head(15).reset_index()
q12.columns = ['City', 'Count']
px.bar(q12, x='Count', y='City', orientation='h', title="Q12: Top 15 Cities for EV Adoption").show()

Q13. How is the EV population spread across Legislative Districts?

In [17]:
q13_data = df_wa['Legislative District'].value_counts().reset_index()
q13_data.columns = ['District', 'Count']

q13_data = q13_data.sort_values('District')

fig13 = px.area(q13_data, x='District', y='Count', 
               title="Q13: EV Registration Density by WA Legislative District",
               labels={'District': 'Legislative District Number', 'Count': 'Total EVs'},
               template="plotly_white")
fig13.show()

Q14. How many new models have entered the market in the last 2 years compared to a decade ago?

In [18]:
q14_data = df_wa.groupby('Model Year')['Model'].nunique().reset_index()
px.bar(q14_data, x='Model Year', y='Model', title="Q14: Market Diversity (Unique Models per Year)").show()

Q15. How does the average electric range vary by both Manufacturer and Vehicle Type?

In [19]:
top_10_makes = df_wa['Make'].value_counts().head(10).index
multi_df = df_wa[(df_wa['Make'].isin(top_10_makes)) & (df_wa['Electric Range'] > 0)]

fig_multi = px.box(multi_df, x='Make', y='Electric Range', color='Electric Vehicle Type',
                  title="Q15:Advanced Analysis: Range Distribution by Manufacturer & Tech Type",
                  labels={'Electric Range': 'Range (miles)'},
                  template="plotly_white")
fig_multi.update_layout(boxmode='group') 
fig_multi.show()

Q16. Can we visualize the exact "clusters" of EVs using latitude/longitude data?

In [20]:

map_df = df_wa.dropna(subset=['Lat', 'Lon']).sample(min(len(df_wa), 50000))
fig16 = px.density_mapbox(map_df, lat='Lat', lon='Lon', radius=8,
                          center=dict(lat=47.3, lon=-120.5), zoom=5,
                          mapbox_style="open-street-map", 
                          title="Q16: Geospatial Density of EVs in Washington State")
fig16.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
fig16.show()


*density_mapbox* is deprecated! Use *density_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/

