# Predictive EV Charging Station Maintenance System

In [1]:
# Importing required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, accuracy_score, classification_report
import joblib

In [2]:
# Loading the dataset
data = pd.read_csv('../data/charging_station_data.csv')
data.head()

Unnamed: 0,charging_station_id,location,time_of_day,next_maintenance_days,usage_efficiency,maintenance_needed,fault_probability,charging_sessions,total_energy_delivered_kW,last_maintenance_date,charging_duration_hours,temperature,voltage,current,user_feedback
0,1,Location C,Night,269.6,1.0,Software Update,0.06,6,639.842852,2024-10-23 04:54:31.926507,0.994945,17.014549,201.947638,78.460496,3
1,2,Location B,Morning,58.3,1.0,Connector Cleaning,0.03,3,477.459579,2024-03-26 09:02:31.926507,1.726527,25.368145,377.111947,95.831005,2
2,3,Location D,Evening,278.7,1.0,Connector Cleaning,0.07,7,213.178622,2024-11-01 17:17:31.926507,1.602714,19.881696,303.748184,61.473404,4
3,4,Location A,Night,347.8,1.0,Connector Cleaning,0.08,8,205.094803,2025-01-09 12:12:31.926507,0.978393,22.106901,353.341956,45.848011,1
4,5,Location B,Evening,134.5,1.0,Cable Inspection,0.05,5,808.333814,2024-06-10 10:23:31.926507,1.022105,31.62407,266.533409,77.774232,2


## Data Exploration

In [3]:
# Checking for missing values
data.isnull().sum()

charging_station_id          0
location                     0
time_of_day                  0
next_maintenance_days        0
usage_efficiency             0
maintenance_needed           0
fault_probability            0
charging_sessions            0
total_energy_delivered_kW    0
last_maintenance_date        0
charging_duration_hours      0
temperature                  0
voltage                      0
current                      0
user_feedback                0
dtype: int64

In [4]:
# Statistical summary
data.describe()

Unnamed: 0,charging_station_id,next_maintenance_days,usage_efficiency,fault_probability,charging_sessions,total_energy_delivered_kW,charging_duration_hours,temperature,voltage,current,user_feedback
count,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0
mean,2500.5,181.87264,1.0,0.049722,4.9722,511.449923,1.509177,27.516006,300.246584,55.179663,3.019
std,1443.520003,104.569033,0.0,0.022259,2.225852,286.128153,0.4902,7.276966,57.916741,25.741046,1.41783
min,1.0,1.0,1.0,0.0,0.0,10.048205,0.5,15.001944,200.052763,10.004734,1.0
25%,1250.75,92.7,1.0,0.03,3.0,269.316075,1.165694,21.330937,250.43428,33.48322,2.0
50%,2500.5,179.3,1.0,0.05,5.0,516.448157,1.51065,27.392233,300.35508,55.502263,3.0
75%,3750.25,271.3,1.0,0.06,6.0,763.35783,1.83683,33.818773,349.002787,77.315426,4.0
max,5000.0,365.0,1.0,0.15,15.0,999.746704,3.154994,39.995808,399.984634,99.980326,5.0


## Data Preprocessing

In [5]:
# Encoding categorical variables
label_enc = LabelEncoder()
data['charging_station_id'] = label_enc.fit_transform(data['charging_station_id'])
data['maintenance_needed'] = data['maintenance_needed'].astype('category').cat.codes

In [6]:
# Converting dates to numerical features
data['last_maintenance_date'] = pd.to_datetime(data['last_maintenance_date'])
data['last_maintenance_days'] = (pd.Timestamp.now() - data['last_maintenance_date']).dt.days
data.drop('last_maintenance_date', axis=1, inplace=True)

## Feature Selection

In [7]:
# Selecting features and target variables
features = ['charging_station_id', 'charging_sessions', 'total_energy_delivered_kW', 'last_maintenance_days', 'charging_duration_hours']
targets = ['next_maintenance_days', 'maintenance_needed', 'fault_probability']
X = data[features]
y = data[targets]

## Splitting the Dataset

In [8]:
# Splitting into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Feature Scaling

In [9]:
# Scaling features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Model Training

In [10]:
# Training regression model for 'next_maintenance_days'
rf_reg = RandomForestRegressor(n_estimators=100, random_state=42)
rf_reg.fit(X_train_scaled, y_train['next_maintenance_days'])

In [11]:
# Training classification model for 'maintenance_needed'
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
rf_clf.fit(X_train_scaled, y_train['maintenance_needed'])

In [12]:
# Training regression model for 'fault_probability'
rf_fault = RandomForestRegressor(n_estimators=100, random_state=42)
rf_fault.fit(X_train_scaled, y_train['fault_probability'])

## Model Evaluation

In [13]:
# Predicting and evaluating 'next_maintenance_days'
y_pred_reg = rf_reg.predict(X_test_scaled)
mse = mean_squared_error(y_test['next_maintenance_days'], y_pred_reg)
print(f'Mean Squared Error: {mse}')

Mean Squared Error: 0.2867551819999932


In [14]:
# Predicting and evaluating 'maintenance_needed'
y_pred_clf = rf_clf.predict(X_test_scaled)
accuracy = accuracy_score(y_test['maintenance_needed'], y_pred_clf)
print(f'Accuracy: {accuracy}')
print(classification_report(y_test['maintenance_needed'], y_pred_clf))

Accuracy: 0.277
              precision    recall  f1-score   support

           0       0.29      0.34      0.31       250
           1       0.26      0.28      0.27       236
           2       0.31      0.29      0.30       265
           3       0.23      0.18      0.21       249

    accuracy                           0.28      1000
   macro avg       0.27      0.28      0.27      1000
weighted avg       0.28      0.28      0.27      1000



In [15]:
# Predicting and evaluating 'fault_probability'
y_pred_fault = rf_fault.predict(X_test_scaled)
mse_fault = mean_squared_error(y_test['fault_probability'], y_pred_fault)
print(f'Mean Squared Error for Fault Probability: {mse_fault}')

Mean Squared Error for Fault Probability: 1.1517999999999571e-07


## Saving the Models

In [17]:
# Saving the trained models and scaler
joblib.dump(rf_reg, '../models/model_next_maintenance_days.pkl')
joblib.dump(rf_clf, '../models/model_maintenance_needed.pkl')
joblib.dump(rf_fault, '../models/model_fault_probability.pkl')
joblib.dump(scaler, '../models/scaler.pkl')

['../models/scaler.pkl']

In [18]:
# Function to generate realistic random input
def generate_random_input():
    charging_station_id = np.random.randint(0, data['charging_station_id'].max() + 1)
    charging_sessions = np.random.randint(1, 100)
    total_energy_delivered_kW = np.random.uniform(10, 500)
    last_maintenance_days = np.random.randint(0, 365)
    charging_duration_hours = np.random.uniform(0.5, 24)
    
    return np.array([[charging_station_id, charging_sessions, total_energy_delivered_kW, last_maintenance_days, charging_duration_hours]])

# Generate random input
random_input = generate_random_input()

# Scale the input
random_input_scaled = scaler.transform(random_input)

# Get predictions
next_maintenance_days_pred = rf_reg.predict(random_input_scaled)
maintenance_needed_pred = rf_clf.predict(random_input_scaled)
fault_probability_pred = rf_fault.predict(random_input_scaled)

print(f'Random Input: {random_input}')
print(f'Predicted Next Maintenance Days: {next_maintenance_days_pred[0]}')
print(f'Predicted Maintenance Needed: {maintenance_needed_pred[0]}')
print(f'Predicted Fault Probability: {fault_probability_pred[0]}')

Random Input: [[3747.           25.          144.59170865  305.           20.12797966]]
Predicted Next Maintenance Days: 61.154999999999994
Predicted Maintenance Needed: 3
Predicted Fault Probability: 0.14110000000000025


