In [None]:

# EV Charging Demand Forecasting – Internship Final Project (Week 3)

In [None]:
## 📌 Objective
To forecast EV charging demand (in kWh) based on features like:
- Hour of the day
- Day of the week
- Whether it's a holiday
- Ambient temperature

In [None]:
---

In [None]:
## 📁 Step 1: Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

In [None]:

## 🧪 Step 2: Create and Visualize Synthetic Dataset

In [None]:
We simulate a realistic dataset for 500 charging sessions. Demand (in kWh) depends on:
- Hourly usage patterns
- Holidays
- Temperature variation

In [None]:
rng = np.random.default_rng(42)
n = 500
data = pd.DataFrame({
    'hour': rng.integers(0, 24, size=n),
    'dayofweek': rng.integers(0, 7, size=n),
    'is_holiday': rng.integers(0, 2, size=n),
    'temperature': rng.normal(30, 4, size=n).round(1),
})
data['demand_kWh'] = (
    8 + 4 * np.sin(2 * np.pi * data['hour'] / 24) +
    2 * data['is_holiday'] +
    rng.normal(0, 1.5, size=n)
).round(2)
outliers = rng.choice(data.index, size=10, replace=False)
data.loc[outliers, 'demand_kWh'] *= 3.5

In [None]:

## 📊 Step 3: Exploratory Data Analysis

In [None]:
sns.boxplot(data['demand_kWh'])
plt.title("EV Charging Demand (with Outliers)")
plt.show()

In [None]:
sns.heatmap(data.corr(), annot=True, cmap="YlGnBu")
plt.title("Feature Correlation")
plt.show()

In [None]:

## 🧠 Step 4: Model Training – Random Forest Regressor

In [None]:
X = data[['hour', 'dayofweek', 'is_holiday', 'temperature']]
y = data['demand_kWh']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
rf = RandomForestRegressor(random_state=42)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

In [None]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [None]:
print("Mean Squared Error (MSE):", round(mse, 2))
print("R² Score:", round(r2, 3))

In [None]:

## 📈 Step 5: Predictions Visualization

In [None]:
plt.figure(figsize=(8,5))
plt.plot(y_test.values[:30], label='Actual')
plt.plot(y_pred[:30], label='Predicted', linestyle='--')
plt.title("Actual vs Predicted EV Demand (Sample 30)")
plt.xlabel("Sample")
plt.ylabel("Demand (kWh)")
plt.legend()
plt.show()

In [None]:

## ✅ Conclusion

In [None]:
- Random Forest model predicted demand with a decent baseline (R² ≈ 0.28).
- You can improve this by:
  - Including real-time traffic or weather data
  - Modeling different locations separately
  - Using LSTM or XGBoost for time-series prediction
