In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline


In [None]:
# Load the data from the CSV file
data = pd.read_csv('EV_Stations_(65 C).csv')
data.head(1)

In [None]:
print('}---------------DataFrame Overview--------------->>') 
print(f'------------------(Rows,Columns){data.shape}-----------------------,{data.info()}')

In [None]:
print('}-----------Summary Statistics for EVSE Levels-------------->') 
data[['EV Level1 EVSE Num', 'EV Level2 EVSE Num','EV DC Fast Count']].describe()


In [None]:
print('Count of Duplicate IDs in the Dataset')
data['ID'].duplicated().sum()

In [None]:
print('---- List of Column Names in the Dataset ----')
data.columns

In [None]:
print('---- Count of Missing Values in Each Column ----')
missing_values_df = data.isnull().sum().reset_index()
missing_values_df.columns = ['Column Name', 'Count']
missing_values_df

In [None]:
# Keep only electric stations
data = data[data['Fuel Type Code'] == 'ELEC']

# Drop columns with more than 95% missing data
threshold = len(data) * 0.05
data = data.dropna(thresh=threshold, axis=1)

# Convert dates to datetime
data['Open Date'] = pd.to_datetime(data['Open Date'], errors='coerce')
data['Updated At'] = pd.to_datetime(data['Updated At'], errors='coerce')


In [None]:
print('------ Filling Missing Values in Selected Columns -----')
data['EV Pricing'].fillna('Unknown',inplace=True)
data['EV Network'].fillna('Unknown',inplace=True)
data['EV Level1 EVSE Num'].fillna(np.mean(data['EV Level1 EVSE Num']),inplace=True)
data['EV Level2 EVSE Num'].fillna(np.mean(data['EV Level2 EVSE Num']),inplace=True)
data['EV DC Fast Count'].fillna(np.mean(data['EV DC Fast Count']),inplace=True)
data['Federal Agency ID'] = data['Federal Agency ID'].fillna('1',inplace=True)
data.loc[data['Federal Agency ID'] == '1', 'Federal Agency Name'] = 'Unknown'

In [None]:
data['State'].value_counts().head(10).plot(kind='bar', title='Top 10 States by Number of EV Stations')

In [None]:
state_CA = data[data['State']=='CA']
CA_City = state_CA['City'].value_counts().reset_index().head(10)
CA_City.columns = ['City', 'Count']
sns.barplot(x='City', y = 'Count' ,data=CA_City, palette='pastel')
plt.title('Top 10 Cities with Most EV Stations in California')
plt.xticks(rotation=45)  
plt.show()

In [None]:
data['Open Date'].dt.year.value_counts().sort_index().plot(kind='bar', figsize=(10,5), title='EV Stations Opened by Year')

In [None]:
fig = plt.figure(figsize=(10,8))
#station = data['Station Name'].value_counts().head(10)
station_df = data['Station Name'].value_counts().reset_index().head(10)
station_df.columns = ['Station Name','Count']
sns.displot(data = station_df, x = 'Station Name',y = 'Count')
plt.xticks(rotation=45)
plt.title("Top 10 Most Common EV Stations")
plt.show()

In [None]:
Network = data['EV Network'].value_counts().head(10)
plt.figure(figsize=(8,6))
plt.barh(Network.index,Network.values, color = 'darkblue')
plt.gca().invert_yaxis()
plt.title("Top 10 EV Network")
plt.show()

In [None]:
fig, ax = plt.subplots()
#Access = data['Access Code'].value_counts()
Access_df = data['Access Code'].value_counts().reset_index()
Access_df.columns = ['Sector','Count']
ax.bar(Access_df['Sector'],Access_df['Count']  , color = 'green')
ax.set_title("Access Code Distribution")
plt.show()

In [None]:
#Facility = data['Facility Type'].value_counts().head(10)
Facility_df = data['Facility Type'].value_counts().reset_index().head(10)
Facility_df.columns = ['Facility','Count']
sns.barplot(data = Facility_df,x = 'Facility',y = 'Count',palette='coolwarm')
plt.title("Top 10 Facility Types")
plt.xticks(rotation=45)
plt.show()

In [None]:
print("---- Updating EV Pricing: Marking Non-Free/Unknown as Chargeable ----")
data.loc[~data['EV Pricing'].isin(['Free', 'Unknown']), 'EV Pricing'] = 'Chargeable'

print(data['EV Pricing'].value_counts())


In [None]:
#Pricing = data['EV Pricing'].value_counts()
Pricing_df = data['EV Pricing'].value_counts().reset_index()
Pricing_df.columns = ['Pricing','Count']
colors = sns.color_palette('muted')[0:4]
plt.figure(figsize=(8, 6))
plt.pie(Pricing_df['Count'], 
        labels=Pricing_df['Pricing'],
        colors=colors, autopct='%1.1f%%',
        startangle=140, wedgeprops={'edgecolor': 'black'},
        textprops={'color': 'white', 'fontsize': 12, 'weight': 'bold'}
        )
plt.title('Distribution of EV Pricing', color = 'white')
#plt.tight_layout()
plt.show()

In [None]:
f, ax = plt.subplots(figsize=(20, 10))  

state_CA = data[data['State']=='CA']

station_CA = state_CA['Station Name'].value_counts().head(10)
station_CA_df = station_CA.reset_index()
station_CA_df.columns = ['Station Name','Count']

sns.lineplot(data=station_CA_df, x="Station Name", y="Count", marker='o', ax=ax)
ax.set_title('Station Name Count in CA')
ax.set_xlabel('Station Name', fontsize=18)
ax.set_ylabel('Count', fontsize=18)
#ax.set_xticklabels(station_CA_df['Station Name'], rotation=45)
plt.setp(ax.get_xticklabels(), rotation=45, ha='right', fontsize=18)

f.tight_layout()
plt.show()


In [None]:
# Set a clean theme
sns.set_theme(style="whitegrid")

# Create figure and axis
fig, ax = plt.subplots(figsize=(12, 6))

# Filter data for California
state_CA = data[data['State'] == 'CA']

# Create the scatter plot with customized marker sizes and palette
scatter = sns.scatterplot(
    data=state_CA,
    x='EV Level1 EVSE Num',
    y='EV Level2 EVSE Num',
    size='EV DC Fast Count',
    hue='Status Code',
    alpha=0.8,
    sizes=(50, 500),      # Adjust marker sizes as needed
    palette="viridis",    # Use a visually appealing color palette
    ax=ax
)

# Correct the title and add axis labels
ax.set_title('EV Level1 vs EV Level2 in CA (Size represents EV DC Fast Count)', fontsize=14)
ax.set_xlabel('EV Level1 EVSE Num', fontsize=12)
ax.set_ylabel('EV Level2 EVSE Num', fontsize=12)

# Improve the legend: display legend for the hue (Status Code)
ax.legend(title="Status Code", loc="upper right", fontsize=10, title_fontsize=12)

plt.tight_layout()
plt.show()


In [None]:
import folium
map_ev = folium.Map(location=[37.0902, -95.7129], zoom_start=5)
for _, row in data.iterrows():
    folium.CircleMarker([row['Latitude'], row['Longitude']],
                        radius=3,
                        color='blue').add_to(map_ev)
map_ev


In [None]:
# from autoviz.AutoViz_Class import AutoViz_Class
# import os

# # Define the CSV file path
# file_path = "EV_Stations_(65 C).csv"

# # Check if the file exists before proceeding
# if not os.path.exists(file_path):
#     raise FileNotFoundError(f"File not found: {file_path}")

# # Create an instance of AutoViz_Class
# av = AutoViz_Class()

# report = av.AutoViz(
#     file_path,
#     sep=",",
#     depVar="",            # Specify target variable if applicable, or leave empty for exploratory analysis
#     dfte=None,            # DataFrame (if already loaded) can be passed here; otherwise, leave as None
#     header=0,
#     verbose=1,
#     lowess=False,         # Set to True if you want to apply lowess smoothing on scatter plots
#     chart_format="svg",   # Use "svg" for scalable graphics, or "png" for bitmap images
#     max_rows_analyzed=150000,
#     max_cols_analyzed=30
# )

# print("AutoViz report generated successfully!")
