In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import geopandas as gpd
import contextily as ctx

In [None]:
df = pd.read_csv('data/Electric_Vehicle_Population_Data.csv')

print(df.info())

In [None]:
plt.figure(figsize=(10,6))

sns.histplot(df['Electric Range'].dropna(),bins = 50)
plt.title("Distribution of Electric Range")
plt.xlabel("Electric Range (miles)")
plt.ylabel("Frequency")
plt.show()

In [None]:
#print(df['Base MSRP'].describe())
df = pd.read_csv('data/Electric_Vehicle_Population_Data.csv')
car_counts_Cty = df['City'].value_counts().nlargest(10)


car_counts_Cty.plot(kind='bar')
plt.xlabel('City')
plt.ylabel('Number of Cars')
plt.title('Top 10 Count of Cars per City')
plt.show()


car_counts_cty_df = car_counts_Cty.to_frame()
car_counts_cty_df.style.background_gradient(cmap='Blues')


In [None]:
City_df = pd.DataFrame(df.City[:20].value_counts().sort_index())
cities_by_Electric_Range=df.City.value_counts()
cities_by_Electric_Range[:20].plot(kind='pie')

In [None]:
sns.scatterplot(data=df, x="Electric Range", y="Base MSRP")

In [None]:
sns.scatterplot(data = df, x = 'Electric Range', y = 'Model Year')

In [None]:
car_counts = df.groupby(['Make', 'Model']).size().reset_index(name='Count')
most_popular_car = car_counts.loc[car_counts['Count'].idxmax()]
popular_car_make = most_popular_car['Make']
popular_car_model = most_popular_car['Model']
popular_car_registrations = df[(df['Make'] == popular_car_make) & (df['Model'] == popular_car_model)]
city_counts = popular_car_registrations['City'].value_counts().reset_index(name='Count')
city_counts.columns = ['City', 'Count']
top_cities = city_counts.head(10)

plt.figure(figsize=(12, 6))
sns.barplot(data=top_cities, x='City', y='Count', palette='viridis')
plt.xticks(rotation=45)
plt.xlabel('City')
plt.ylabel('Number of Registrations')
plt.title(f'Top 10 Cities with Most Registrations of {popular_car_make} {popular_car_model} (Most Popular Car)')
plt.tight_layout()
plt.show()

In [None]:
non_zero_range = df[df['Electric Range'] > 0]

sns.histplot(data=non_zero_range, x='Electric Range', hue='Electric Vehicle Type', bins=30, multiple='stack')
plt.title("Electric Range Distribution by Vehicle Type (Excluding Vehicles with zero mile electric range")
plt.xlabel("Electric Range (miles)")
plt.ylabel("Frequency")
plt.show()



In [None]:
df_filtered = df[df['Electric Range'] > 0]

plt.figure(figsize=(12, 8))  

sns.boxplot(data=df_filtered, x='Make', y='Electric Range')
plt.title("Electric Range Distribution by Make (Excluding Zero Range)")
plt.xlabel("Make")
plt.ylabel("Electric Range (miles)")
plt.xticks(rotation=90) 
plt.show()

In [None]:
df_filtered = df[df['Model Year'] >= 2010]


plt.figure(figsize=(12, 8)) 

# Plot the boxplot for Electric Range by Model Year
sns.boxplot(data=df_filtered, x='Model Year', y='Electric Range')
plt.title("Electric Range Distribution by Model Year (2010 and Above)")
plt.xlabel("Model Year")
plt.ylabel("Electric Range (miles)")
plt.xticks(rotation=90)  
plt.show()


In [None]:
plt.figure(figsize=(12, 6))  

sns.histplot(data=df, 
             x="Electric Range", 
             hue="Clean Alternative Fuel Vehicle (CAFV) Eligibility", 
             multiple="stack", 
             linewidth=0,bins = 25) 


plt.title('Electric Range Distribution by CAFV Eligibility')
plt.xlabel('Electric Range')
plt.ylabel('Count')


plt.show()

In [None]:
city_counts = df['City'].value_counts().reset_index()
city_counts.columns = ['City', 'Count']

# Create a bar chart
plt.figure(figsize=(12, 6))
sns.barplot(data=city_counts.head(20), x='City', y='Count', palette='viridis')  # Display top 20 cities
plt.title('Number of Electric Vehicles by City')
plt.xlabel('City')
plt.ylabel('Number of EVs')
plt.xticks(rotation=45)
plt.show()

In [None]:
county_counts = df['County'].value_counts().reset_index()
county_counts.columns = ['County', 'Count']


plt.figure(figsize=(12, 6))
sns.barplot(data=county_counts.head(20), x='County', y='Count', palette='magma')  # Display top 20 counties
plt.title('Number of Electric Vehicles by County')
plt.xlabel('County')
plt.ylabel('Number of EVs')
plt.xticks(rotation=45)
plt.show()


In [None]:
state_abbr_to_name = {
    "AL": "Alabama", "AK": "Alaska", "AZ": "Arizona", "AR": "Arkansas",
    "CA": "California", "CO": "Colorado", "CT": "Connecticut", "DE": "Delaware",
    "FL": "Florida", "GA": "Georgia", "HI": "Hawaii", "ID": "Idaho",
    "IL": "Illinois", "IN": "Indiana", "IA": "Iowa", "KS": "Kansas",
    "KY": "Kentucky", "LA": "Louisiana", "ME": "Maine", "MD": "Maryland",
    "MA": "Massachusetts", "MI": "Michigan", "MN": "Minnesota", "MS": "Mississippi",
    "MO": "Missouri", "MT": "Montana", "NE": "Nebraska", "NV": "Nevada",
    "NH": "New Hampshire", "NJ": "New Jersey", "NM": "New Mexico", "NY": "New York",
    "NC": "North Carolina", "ND": "North Dakota", "OH": "Ohio", "OK": "Oklahoma",
    "OR": "Oregon", "PA": "Pennsylvania", "RI": "Rhode Island", "SC": "South Carolina",
    "SD": "South Dakota", "TN": "Tennessee", "TX": "Texas", "UT": "Utah",
    "VT": "Vermont", "VA": "Virginia", "WA": "Washington", "WV": "West Virginia",
    "WI": "Wisconsin", "WY": "Wyoming", "DC": "District of Columbia"
}

In [None]:
df = pd.read_csv('data/Electric_Vehicle_Population_Data.csv')# Count vehicles by state abbreviation
vehicle_counts = df['State'].value_counts().reset_index()
vehicle_counts.columns = ['State', 'Vehicle Count']
vehicle_counts['State'] = vehicle_counts['State'].map(state_abbr_to_name)
vehicle_counts.dropna(subset=['State'], inplace=True)

print(vehicle_counts)

In [None]:
shape_path = r"C:\Users\ahmad\Downloads\cb_2018_us_state_500k\cb_2018_us_state_500k.shp"
shape = gpd.read_file(shape_path)
shape = pd.merge(
    left = shape,
    right = vehicle_counts,
    left_on = 'NAME',
    right_on='State',
    how = 'left'
)

shape = shape.dropna()
shape = shape[~shape['NAME'].isin(['Alaska','Hawaii','Puerto Rico'])]

ax = shape.boundary.plot(edgecolor = 'black',linewidth = 0.2, figsize = (10,5))
shape.plot(ax = ax,column='Vehicle Count',legend = True,cmap = 'RdBu',legend_kwds = ({'shrink':0.3,'orientation': 'horizontal'}))

ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
for edge in ['right','bottom','top']:
    ax.spines[edge].set_visible(False)

ax.set_title("EVs Distribution in the USA",size=18,weight='bold')
plt.show()