# Factors Influencing EV Charging Efficiency: A Comprehensive Model Evaluation

In [30]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [31]:
df = pd.read_csv('Charging station data.csv')

In [32]:
df.head()

Unnamed: 0,Date,Time,EV Charging Demand (kW),Solar Energy Production (kW),Wind Energy Production (kW),Electricity Price ($/kWh),Grid Availability,Weather Conditions,Battery Storage (kWh),Charging Station Capacity (kW),EV Charging Efficiency (%),Number of EVs Charging,Peak Demand (kW),Renewable Energy Usage (%),Grid Stability Index,Carbon Emissions (kgCO2/kWh),Power Outages (hours),Energy Savings ($)
0,01-01-2021,00:00:00,0.112362,0.125388,0.009105,0.13731,Available,Partly Cloudy,16.532408,21.763422,97.326376,6,0.15168,25.039066,0.731147,0.274944,1.889209,4.562581
1,01-01-2021,01:00:00,0.285214,0.052697,0.107589,0.125105,Available,Sunny,39.10693,31.215028,88.546913,7,0.573433,55.649899,1.494387,0.481251,0.277371,0.215104
2,01-01-2021,02:00:00,0.219598,0.105035,0.043996,0.106661,Available,Cloudy,6.112691,46.489116,89.872971,4,0.978482,79.970783,1.109293,0.146079,0.642644,0.029969
3,01-01-2021,03:00:00,0.179598,0.073839,0.275727,0.072209,Available,Partly Cloudy,30.041088,49.675029,92.759074,5,0.208993,2.267048,0.847219,0.475255,0.54668,1.38495
4,01-01-2021,04:00:00,0.046806,0.068614,0.059824,0.09109,Available,Partly Cloudy,45.085422,21.166182,83.784529,9,0.805132,96.815797,1.452466,0.319261,1.93585,1.87217


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29905 entries, 0 to 29904
Data columns (total 18 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   Date                            29905 non-null  object 
 1   Time                            29905 non-null  object 
 2   EV Charging Demand (kW)         29905 non-null  float64
 3   Solar Energy Production (kW)    29905 non-null  float64
 4   Wind Energy Production (kW)     29905 non-null  float64
 5   Electricity Price ($/kWh)       29905 non-null  float64
 6   Grid Availability               29905 non-null  object 
 7   Weather Conditions              29905 non-null  object 
 8   Battery Storage (kWh)           29905 non-null  float64
 9   Charging Station Capacity (kW)  29905 non-null  float64
 10  EV Charging Efficiency (%)      29905 non-null  float64
 11  Number of EVs Charging          29905 non-null  int64  
 12  Peak Demand (kW)                

In [33]:
df['Date'].max()


'31-12-2023'

In [34]:
df['Date'].min()

'01-01-2021'

# Calculations To find derived columns from the data set.

In [35]:


# Total Renewable Energy Production (kW):
# ======================================= 
# The sum of solar and wind energy production.
# This represents the total amount of renewable energy generated.
df['Total Renewable Energy Production (kW)'] = df['Solar Energy Production (kW)'] + df['Wind Energy Production (kW)']

# Effective Charging Capacity (kW):
# ==================================
# The product of charging station capacity and EV charging efficiency.
# This calculates the actual capacity available for charging EVs, factoring in the efficiency of the charging process.
df['Effective Charging Capacity (kW)'] = (df['Charging Station Capacity (kW)'] * df['EV Charging Efficiency (%)']) / 100

# Adjusted Charging Demand (kW): 
# ===============================
# The EV charging demand adjusted by renewable energy usage.
# This reflects the portion of the charging demand that is met by renewable energy sources.
df['Adjusted Charging Demand (kW)'] = (df['EV Charging Demand (kW)'] * df['Renewable Energy Usage (%)']) / 100

# Net Energy Cost ($): 
# ===================== 
# The product of EV charging demand and electricity price.
# This gives the total cost associated with charging the EVs based on the demand and the current electricity prices.
df['Net Energy Cost ($)'] = df['EV Charging Demand (kW)'] * df['Electricity Price ($/kWh)']

# Carbon Footprint Reduction (kgCO2): 
# =====================================
# The reduction in carbon emissions due to renewable energy usage.
# This metric quantifies how much CO2 emissions are avoided by utilizing renewable energy for EV charging.
df['Carbon Footprint Reduction (kgCO2)'] = (
    df['EV Charging Demand (kW)'] * df['Carbon Emissions (kgCO2/kWh)'] * (1 - df['Renewable Energy Usage (%)'] / 100)
)

# Renewable Energy Efficiency (%): 
# ================================= 
# The efficiency of utilizing renewable energy for charging electric vehicles.
# This metric shows the proportion of renewable energy produced relative to the effective capacity available for charging.
df['Renewable Energy Efficiency (%)'] = (df['Total Renewable Energy Production (kW)'] / df['Effective Charging Capacity (kW)']) * 100

# Setting display options to show all columns and rows in the DataFrame
pd.set_option('display.max_columns', None)  # Display all columns
pd.set_option('display.max_rows', None)     # Display all rows


In [36]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29905 entries, 0 to 29904
Data columns (total 24 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   Date                                    29905 non-null  object 
 1   Time                                    29905 non-null  object 
 2   EV Charging Demand (kW)                 29905 non-null  float64
 3   Solar Energy Production (kW)            29905 non-null  float64
 4   Wind Energy Production (kW)             29905 non-null  float64
 5   Electricity Price ($/kWh)               29905 non-null  float64
 6   Grid Availability                       29905 non-null  object 
 7   Weather Conditions                      29905 non-null  object 
 8   Battery Storage (kWh)                   29905 non-null  float64
 9   Charging Station Capacity (kW)          29905 non-null  float64
 10  EV Charging Efficiency (%)              29905 non-null  fl

# Data Preprocessing

In [37]:
df.head()

Unnamed: 0,Date,Time,EV Charging Demand (kW),Solar Energy Production (kW),Wind Energy Production (kW),Electricity Price ($/kWh),Grid Availability,Weather Conditions,Battery Storage (kWh),Charging Station Capacity (kW),EV Charging Efficiency (%),Number of EVs Charging,Peak Demand (kW),Renewable Energy Usage (%),Grid Stability Index,Carbon Emissions (kgCO2/kWh),Power Outages (hours),Energy Savings ($),Total Renewable Energy Production (kW),Effective Charging Capacity (kW),Adjusted Charging Demand (kW),Net Energy Cost ($),Carbon Footprint Reduction (kgCO2),Renewable Energy Efficiency (%)
0,01-01-2021,00:00:00,0.112362,0.125388,0.009105,0.13731,Available,Partly Cloudy,16.532408,21.763422,97.326376,6,0.15168,25.039066,0.731147,0.274944,1.889209,4.562581,0.134493,21.18155,0.028134,0.015428,0.023158,0.634955
1,01-01-2021,01:00:00,0.285214,0.052697,0.107589,0.125105,Available,Sunny,39.10693,31.215028,88.546913,7,0.573433,55.649899,1.494387,0.481251,0.277371,0.215104,0.160286,27.639943,0.158721,0.035682,0.060875,0.579907
2,01-01-2021,02:00:00,0.219598,0.105035,0.043996,0.106661,Available,Cloudy,6.112691,46.489116,89.872971,4,0.978482,79.970783,1.109293,0.146079,0.642644,0.029969,0.149031,41.78115,0.175614,0.023423,0.006425,0.356695
3,01-01-2021,03:00:00,0.179598,0.073839,0.275727,0.072209,Available,Partly Cloudy,30.041088,49.675029,92.759074,5,0.208993,2.267048,0.847219,0.475255,0.54668,1.38495,0.349567,46.078097,0.004072,0.012969,0.08342,0.75864
4,01-01-2021,04:00:00,0.046806,0.068614,0.059824,0.09109,Available,Partly Cloudy,45.085422,21.166182,83.784529,9,0.805132,96.815797,1.452466,0.319261,1.93585,1.87217,0.128438,17.733986,0.045315,0.004264,0.000476,0.72425


In [38]:
objectcolumns =df.select_dtypes(include='object').columns
for columns in objectcolumns:
    unique_values = df[columns].unique()
    print(f"column name :{columns}")
    print(f"column values :{unique_values}\n")

column name :Date
column values :['01-01-2021' '02-01-2021' '03-01-2021' ... '29-05-2024' '30-05-2024'
 '31-05-2024']

column name :Time
column values :['00:00:00' '01:00:00' '02:00:00' '03:00:00' '04:00:00' '05:00:00'
 '06:00:00' '07:00:00' '08:00:00' '09:00:00' '10:00:00' '11:00:00'
 '12:00:00' '13:00:00' '14:00:00' '15:00:00' '16:00:00' '17:00:00'
 '18:00:00' '19:00:00' '20:00:00' '21:00:00' '22:00:00' '23:00:00']

column name :Grid Availability
column values :['Available' 'Unavailable']

column name :Weather Conditions
column values :['Partly Cloudy' 'Sunny' 'Cloudy' 'Clear' 'Rainy']



In [39]:
from sklearn.preprocessing import LabelEncoder
Label_encoder = LabelEncoder()
columns_to_encode =  ["Grid Availability", "Weather Conditions"]
for columns in columns_to_encode:
    if df[columns].isnull().any():
        df[columns] = df[columns].astype(str)
    df[columns]= Label_encoder.fit_transform(df[columns])


In [40]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29905 entries, 0 to 29904
Data columns (total 24 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   Date                                    29905 non-null  object 
 1   Time                                    29905 non-null  object 
 2   EV Charging Demand (kW)                 29905 non-null  float64
 3   Solar Energy Production (kW)            29905 non-null  float64
 4   Wind Energy Production (kW)             29905 non-null  float64
 5   Electricity Price ($/kWh)               29905 non-null  float64
 6   Grid Availability                       29905 non-null  int32  
 7   Weather Conditions                      29905 non-null  int32  
 8   Battery Storage (kWh)                   29905 non-null  float64
 9   Charging Station Capacity (kW)          29905 non-null  float64
 10  EV Charging Efficiency (%)              29905 non-null  fl

In [41]:
objectcolumns = df.select_dtypes(include='int32').columns
for column in objectcolumns:
    if column in ["Grid Availability", "Weather Conditions"]:  
        unique_values = df[column].unique()
        print(f"Column Name: {column}")
        print(f"Column Values: {unique_values}\n")
#column name :Grid Availability
#column values :['Available' 'Unavailable']

#column name :Weather Conditions
#column values :['Partly Cloudy' 'Sunny' 'Cloudy' 'Clear' 'Rainy']

Column Name: Grid Availability
Column Values: [0 1]

Column Name: Weather Conditions
Column Values: [2 4 1 0 3]



In [42]:
df.head()

Unnamed: 0,Date,Time,EV Charging Demand (kW),Solar Energy Production (kW),Wind Energy Production (kW),Electricity Price ($/kWh),Grid Availability,Weather Conditions,Battery Storage (kWh),Charging Station Capacity (kW),EV Charging Efficiency (%),Number of EVs Charging,Peak Demand (kW),Renewable Energy Usage (%),Grid Stability Index,Carbon Emissions (kgCO2/kWh),Power Outages (hours),Energy Savings ($),Total Renewable Energy Production (kW),Effective Charging Capacity (kW),Adjusted Charging Demand (kW),Net Energy Cost ($),Carbon Footprint Reduction (kgCO2),Renewable Energy Efficiency (%)
0,01-01-2021,00:00:00,0.112362,0.125388,0.009105,0.13731,0,2,16.532408,21.763422,97.326376,6,0.15168,25.039066,0.731147,0.274944,1.889209,4.562581,0.134493,21.18155,0.028134,0.015428,0.023158,0.634955
1,01-01-2021,01:00:00,0.285214,0.052697,0.107589,0.125105,0,4,39.10693,31.215028,88.546913,7,0.573433,55.649899,1.494387,0.481251,0.277371,0.215104,0.160286,27.639943,0.158721,0.035682,0.060875,0.579907
2,01-01-2021,02:00:00,0.219598,0.105035,0.043996,0.106661,0,1,6.112691,46.489116,89.872971,4,0.978482,79.970783,1.109293,0.146079,0.642644,0.029969,0.149031,41.78115,0.175614,0.023423,0.006425,0.356695
3,01-01-2021,03:00:00,0.179598,0.073839,0.275727,0.072209,0,2,30.041088,49.675029,92.759074,5,0.208993,2.267048,0.847219,0.475255,0.54668,1.38495,0.349567,46.078097,0.004072,0.012969,0.08342,0.75864
4,01-01-2021,04:00:00,0.046806,0.068614,0.059824,0.09109,0,2,45.085422,21.166182,83.784529,9,0.805132,96.815797,1.452466,0.319261,1.93585,1.87217,0.128438,17.733986,0.045315,0.004264,0.000476,0.72425


In [43]:
columns_to_drop = ['Date', 'Time']
existing_columns_to_drop = [col for col in columns_to_drop if col in df.columns]

df_preprocessing = df.drop(existing_columns_to_drop, axis=1)


In [44]:
df_preprocessing.describe()

Unnamed: 0,EV Charging Demand (kW),Solar Energy Production (kW),Wind Energy Production (kW),Electricity Price ($/kWh),Grid Availability,Weather Conditions,Battery Storage (kWh),Charging Station Capacity (kW),EV Charging Efficiency (%),Number of EVs Charging,Peak Demand (kW),Renewable Energy Usage (%),Grid Stability Index,Carbon Emissions (kgCO2/kWh),Power Outages (hours),Energy Savings ($),Total Renewable Energy Production (kW),Effective Charging Capacity (kW),Adjusted Charging Demand (kW),Net Energy Cost ($),Carbon Footprint Reduction (kgCO2),Renewable Energy Efficiency (%)
count,29905.0,29905.0,29905.0,29905.0,29905.0,29905.0,29905.0,29905.0,29905.0,29905.0,29905.0,29905.0,29905.0,29905.0,29905.0,29905.0,29905.0,29905.0,29905.0,29905.0,29905.0,29905.0
mean,0.149904,0.14989,0.15018,0.124881,0.052132,2.0001,24.865209,27.538018,90.029451,4.994248,0.497221,50.011813,0.999472,0.299668,0.998223,2.511337,0.30007,24.786523,0.074774,0.0187346,0.02247968,1.720099
std,0.086369,0.086664,0.086476,0.043358,0.222296,1.412261,14.41647,13.019602,5.775245,2.572082,0.288334,28.822138,0.289173,0.115765,0.575196,1.4421,0.122626,11.833579,0.065761,0.01317326,0.02288975,1.540322
min,3e-06,2e-06,3.9e-05,0.050004,0.0,0.0,0.004353,5.001468,80.000418,1.0,2e-06,0.001845,0.50006,0.100016,0.000161,0.000108,0.00094,4.029751,2e-06,3.256815e-07,1.645522e-07,0.004444
25%,0.075396,0.074872,0.075051,0.087015,0.0,1.0,12.375354,16.233951,84.998439,3.0,0.247186,24.881504,0.75008,0.200188,0.501348,1.259527,0.212279,14.505241,0.020307,0.008113422,0.005307018,0.771331
50%,0.150148,0.150008,0.150752,0.125131,0.0,2.0,24.843793,27.594152,90.04651,5.0,0.495635,49.896696,0.999294,0.299934,0.999277,2.527125,0.300235,24.728068,0.056072,0.01626189,0.01483351,1.219171
75%,0.224333,0.225072,0.225735,0.162368,0.0,3.0,37.333133,38.796611,95.03201,7.0,0.747044,74.958632,1.248986,0.40056,1.492273,3.755299,0.387697,34.769205,0.113962,0.02714478,0.0322099,2.078169
max,0.299977,0.299992,0.299998,0.199996,1.0,4.0,49.997999,49.998695,99.999152,9.0,0.999927,99.999461,1.499929,0.499994,1.999974,4.99928,0.598668,49.887808,0.295497,0.05936202,0.1435133,12.994773


In [45]:
df_preprocessing.corr()

Unnamed: 0,EV Charging Demand (kW),Solar Energy Production (kW),Wind Energy Production (kW),Electricity Price ($/kWh),Grid Availability,Weather Conditions,Battery Storage (kWh),Charging Station Capacity (kW),EV Charging Efficiency (%),Number of EVs Charging,Peak Demand (kW),Renewable Energy Usage (%),Grid Stability Index,Carbon Emissions (kgCO2/kWh),Power Outages (hours),Energy Savings ($),Total Renewable Energy Production (kW),Effective Charging Capacity (kW),Adjusted Charging Demand (kW),Net Energy Cost ($),Carbon Footprint Reduction (kgCO2),Renewable Energy Efficiency (%)
EV Charging Demand (kW),1.0,0.00656,-0.003047,0.00385,0.006033,-0.00014,-0.003023,-0.005648,-0.010259,-0.002515,-0.00205,-0.007852,-0.000656,0.001095,0.000118,-0.002528,0.002488,-0.006375,0.651029,0.82221,0.567486,0.004408
Solar Energy Production (kW),0.00656,1.0,0.003232,0.009653,-0.006249,0.005864,0.000826,-0.006034,0.0019,-0.012666,-0.007287,-0.005499,0.006333,-0.00125,0.002136,-0.001048,0.709013,-0.005695,-0.000482,0.011872,0.009719,0.323857
Wind Energy Production (kW),-0.003047,0.003232,1.0,0.000887,0.003693,-0.005213,0.000651,-0.007691,-0.004063,0.008893,0.007612,0.001851,-0.003475,0.009032,4.5e-05,0.008116,0.707483,-0.007954,-9e-06,-0.004171,0.000849,0.326267
Electricity Price ($/kWh),0.00385,0.009653,0.000887,1.0,-0.007808,0.004291,0.012657,0.005329,-0.004153,0.00038,-0.004045,0.007185,0.001395,0.001736,-0.000984,-0.003235,0.007447,0.005002,0.006664,0.496697,0.000703,0.005403
Grid Availability,0.006033,-0.006249,0.003693,-0.007808,1.0,-0.003532,0.003742,0.005987,-0.001171,0.011695,0.004296,-0.007753,-0.002219,-0.003897,0.015546,0.000647,-0.001813,0.005193,-0.002104,0.000477,0.007415,-0.006554
Weather Conditions,-0.00014,0.005864,-0.005213,0.004291,-0.003532,1.0,-0.007555,0.001327,-0.003626,0.010532,-0.011733,0.000571,0.005648,0.00248,-0.006739,-0.002864,0.000468,0.000742,0.006706,0.003179,-0.007431,-0.003084
Battery Storage (kWh),-0.003023,0.000826,0.000651,0.012657,0.003742,-0.007555,1.0,0.000707,-0.007919,0.003472,-0.013524,-0.001557,-0.00014,-0.001326,0.0143,0.006871,0.001043,-0.000345,-0.000707,0.005134,-0.001793,-0.001009
Charging Station Capacity (kW),-0.005648,-0.006034,-0.007691,0.005329,0.005987,0.001327,0.000707,1.0,-0.007718,0.012041,0.00275,0.001475,0.00744,-0.003105,-0.006764,-0.004345,-0.009688,0.988941,-0.003868,-0.006101,-0.004531,-0.693057
EV Charging Efficiency (%),-0.010259,0.0019,-0.004063,-0.004153,-0.001171,-0.003626,-0.007919,-0.007718,1.0,-0.000259,-0.007023,0.006472,0.007575,0.007145,0.007845,0.002248,-0.001522,0.126331,-4e-05,-0.009162,-0.009008,-0.071265
Number of EVs Charging,-0.002515,-0.012666,0.008893,0.00038,0.011695,0.010532,0.003472,0.012041,-0.000259,1.0,-0.003899,-0.004515,0.000968,-0.004302,0.003163,-0.00431,-0.00268,0.012411,-0.003156,-0.002578,-0.003418,-0.012117


# Data visualization and outliers removal

In [47]:
plt.figure(figsize=(20,15))
sns.heatmap(df_preprocessing.corr(),annot= True,linecolor='white',cmap='spring')

<Axes: >

In [48]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29905 entries, 0 to 29904
Data columns (total 24 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   Date                                    29905 non-null  object 
 1   Time                                    29905 non-null  object 
 2   EV Charging Demand (kW)                 29905 non-null  float64
 3   Solar Energy Production (kW)            29905 non-null  float64
 4   Wind Energy Production (kW)             29905 non-null  float64
 5   Electricity Price ($/kWh)               29905 non-null  float64
 6   Grid Availability                       29905 non-null  int32  
 7   Weather Conditions                      29905 non-null  int32  
 8   Battery Storage (kWh)                   29905 non-null  float64
 9   Charging Station Capacity (kW)          29905 non-null  float64
 10  EV Charging Efficiency (%)              29905 non-null  fl

In [49]:
import matplotlib.pyplot as mlt
features = ['EV Charging Demand (kW)',
'Solar Energy Production (kW)',
'Wind Energy Production (kW)',
'Electricity Price ($/kWh)',
'Grid Availability',
'Weather Conditions',
'Battery Storage (kWh)',
'Charging Station Capacity (kW)',
'EV Charging Efficiency (%)',
'Number of EVs Charging',
'Peak Demand (kW)',
'Renewable Energy Usage (%)',
'Grid Stability Index',
'Carbon Emissions (kgCO2/kWh)',
'Power Outages (hours)',
'Energy Savings ($)',
'Total Renewable Energy Production (kW)',
'Effective Charging Capacity (kW)',
'Adjusted Charging Demand (kW)',
'Net Energy Cost ($)',
'Carbon Footprint Reduction (kgCO2)',
'Renewable Energy Efficiency (%)'
           ]
df_without_outliers = df.copy()

for feature in features:
    plt.figure(figsize=(20, 10))
    plt.subplot(1, 2, 1)
    plt.boxplot(df[feature])
    plt.title(f"With outliers: {feature}")

    plt.subplot(1, 2, 2)
    IQ1 = df[feature].quantile(0.25)
    IQ3 = df[feature].quantile(0.75)
    IQR = IQ3 - IQ1
    lower = IQ1 - 1.5 * IQR
    upper = IQ3 + 1.5 * IQR
    
    # Filtering outliers
    outlier_condition = (df[feature] < lower) | (df[feature] > upper)
    df_without_outliers = df_without_outliers[~outlier_condition]

    plt.boxplot(df_without_outliers[feature])
    plt.title(f"Without outliers: {feature}")
    plt.show()

# df_without_outliers now contains the data without outliers
print("New DataFrame shape after removing outliers:", df_without_outliers.shape)

  plt.show()
  plt.show()
  plt.show()
  plt.show()
  plt.show()
  df_without_outliers = df_without_outliers[~outlier_condition]
  plt.show()
  df_without_outliers = df_without_outliers[~outlier_condition]
  plt.show()
  df_without_outliers = df_without_outliers[~outlier_condition]
  plt.show()
  df_without_outliers = df_without_outliers[~outlier_condition]
  plt.show()
  df_without_outliers = df_without_outliers[~outlier_condition]
  plt.show()
  df_without_outliers = df_without_outliers[~outlier_condition]
  plt.show()
  df_without_outliers = df_without_outliers[~outlier_condition]
  plt.show()
  df_without_outliers = df_without_outliers[~outlier_condition]
  plt.show()
  df_without_outliers = df_without_outliers[~outlier_condition]
  plt.show()
  df_without_outliers = df_without_outliers[~outlier_condition]
  plt.show()
  df_without_outliers = df_without_outliers[~outlier_condition]
  plt.show()
  df_without_outliers = df_without_outliers[~outlier_condition]
  plt.show()
  df_withou

New DataFrame shape after removing outliers: (24406, 24)


  df_without_outliers = df_without_outliers[~outlier_condition]
  plt.show()
  df_without_outliers = df_without_outliers[~outlier_condition]
  plt.show()
  df_without_outliers = df_without_outliers[~outlier_condition]
  plt.show()


In [50]:
df_without_outliers.info()

<class 'pandas.core.frame.DataFrame'>
Index: 24406 entries, 0 to 29904
Data columns (total 24 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   Date                                    24406 non-null  object 
 1   Time                                    24406 non-null  object 
 2   EV Charging Demand (kW)                 24406 non-null  float64
 3   Solar Energy Production (kW)            24406 non-null  float64
 4   Wind Energy Production (kW)             24406 non-null  float64
 5   Electricity Price ($/kWh)               24406 non-null  float64
 6   Grid Availability                       24406 non-null  int32  
 7   Weather Conditions                      24406 non-null  int32  
 8   Battery Storage (kWh)                   24406 non-null  float64
 9   Charging Station Capacity (kW)          24406 non-null  float64
 10  EV Charging Efficiency (%)              24406 non-null  float64

In [51]:
solar_energy = df['Solar Energy Production (kW)'].iloc[0]
wind_energy = df['Wind Energy Production (kW)'].iloc[0]

# Prepare data for the pie chart
labels = ['Solar Energy', 'Wind Energy']
sizes = [solar_energy, wind_energy]

# Plot pie chart
plt.figure(figsize=(4, 4))
plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=140, colors=['#ffcc33', '#66b3ff'])
plt.title('Total Renewable Energy Production Breakdown')
plt.show()

  plt.show()


In [52]:
median_ev_demand = df['EV Charging Demand (kW)'].median()

# Create a new column for color based on the EV Charging Demand
df['Color'] = ['above' if x > median_ev_demand else 'below' for x in df['EV Charging Demand (kW)']]

# Set color palette
palette = {'above': 'blue', 'below': 'orange'}

# Create scatter plot with different colors
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='EV Charging Demand (kW)', y='Electricity Price ($/kWh)', hue='Color', palette=palette, s=100)
plt.title('Scatter Plot of EV Charging Demand vs Electricity Price')
plt.xlabel('EV Charging Demand (kW)')
plt.ylabel('Electricity Price ($/kWh)')
plt.grid(True)
plt.legend(title='EV Demand', loc='upper left')
plt.show()

  plt.show()


# Regression Analysis of Factors Influencing EV Charging Efficiency: A Comprehensive Model Evaluation

In [56]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt


# Define input features (X) and target variable (y)
X = df_without_outliers[['EV Charging Demand (kW)',
    'Grid Availability',
    'Grid Stability Index',
    'Weather Conditions',
    'Battery Storage (kWh)',
    'Number of EVs Charging',
    'Peak Demand (kW)',
    'Power Outages (hours)',
    'Charging Station Capacity (kW)',
    'Effective Charging Capacity (kW)',
    'Total Renewable Energy Production (kW)',
    'Renewable Energy Usage (%)']]

y = df_without_outliers['EV Charging Efficiency (%)']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Linear Regression model
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

# Predict on the test set
y_pred = lr_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R² Score: {r2:.2f}")

# Plotting Actual vs Predicted Adjusted Charging Demand
plt.figure(figsize=(8, 6))
plt.scatter(y_test, y_pred, color='blue', edgecolors='k', alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
plt.xlabel('Actual EV Charging Efficiency (%)')
plt.ylabel('Predicted EV Charging Efficiency (%)')
plt.title('Actual vs Predicted EV Charging Efficiency (%) using LinearRegression')
plt.grid(True)
plt.show()
import pickle
with open('lr_model.pkl', 'wb') as file:
    pickle.dump(lr_model, file)


Mean Squared Error: 4.93
R² Score: 0.85


  plt.show()


# GUI Application for Predicting Electric Vehicle Charging Efficiency Using Linear Regression


In [2]:
import tkinter as tk
from tkinter import messagebox
import pandas as pd
from sklearn.linear_model import LinearRegression

# Assuming df_without_outliers is already defined and lr_model is trained

# Function to make predictions
def predict_efficiency():
    try:
        # Get user inputs from the form
        inputs = [
            float(entry_ev_demand.get()),
            float(entry_grid_availability.get()),
            float(entry_grid_stability.get()),
            float(entry_weather_conditions.get()),
            float(entry_battery_storage.get()),
            float(entry_number_of_evs.get()),
            float(entry_peak_demand.get()),
            float(entry_power_outages.get()),
            float(entry_charging_station_capacity.get()),
            float(entry_effective_charging_capacity.get()),
            
            float(entry_Total_Renewable_Energy_Production.get()),
            float(entry_renewable_energy_usage.get())
        ]

        # Create DataFrame for prediction
        input_df = pd.DataFrame([inputs], columns=X.columns)

        # Make prediction using the trained model
        prediction = lr_model.predict(input_df)

        # Display the prediction result in a messagebox
        messagebox.showinfo("Prediction Result", f"Predicted EV Charging Efficiency: {prediction[0]:.2f}%")

    except ValueError:
        messagebox.showerror("Input Error", "Please enter valid numerical values.")

# Create the main window
root = tk.Tk()
root.title("EV Charging Efficiency Predictor")

# Input field labels
labels = [
    'EV Charging Demand (kW)',
    'Grid Availability',
    'Grid Stability Index',
    'Weather Conditions',
    'Battery Storage (kWh)',
    'Number of EVs Charging',
    'Peak Demand (kW)',
    'Power Outages (hours)',
    'Charging Station Capacity (kW)',
    'Effective Charging Capacity (kW)',
    'Total Renewable Energy Production (kW)',
    'Renewable Energy Usage (%)'
]

# Create input fields dynamically
entries = []
for label in labels:
    tk.Label(root, text=label).pack()
    entry = tk.Entry(root)
    entry.pack()
    entries.append(entry)

# Assign each entry to a variable for easy access
(
    entry_ev_demand, entry_grid_availability, entry_grid_stability,
    entry_weather_conditions, entry_battery_storage, entry_number_of_evs,
    entry_peak_demand, entry_power_outages, entry_charging_station_capacity,
    entry_effective_charging_capacity,entry_Total_Renewable_Energy_Production,
    entry_renewable_energy_usage
) = entries

# Predict button
predict_button = tk.Button(root, text="Predict Efficiency", command=predict_efficiency)
predict_button.pack()

# Run the application
root.mainloop()


# Predicted EV Charging Efficiency (%) using Polynomialregression

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

X = df_without_outliers[['EV Charging Demand (kW)',
    'Grid Availability',
    'Grid Stability Index',
    'Weather Conditions',
    'Battery Storage (kWh)',
    'Number of EVs Charging',
    'Peak Demand (kW)',
    'Power Outages (hours)',
    'Charging Station Capacity (kW)',
    'Effective Charging Capacity (kW)',
    'Total Renewable Energy Production (kW)',
    'Renewable Energy Usage (%)'
]]

## EV Charging Efficiency (%): The efficiency of the EV charging process, expressed as a percentage.

y = df_without_outliers['EV Charging Efficiency (%)']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create polynomial features
#degree = 2  # You can change the degree based on your requirements
poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# Fit the polynomial regression model
poly_model = LinearRegression()
poly_model.fit(X_train_poly, y_train)

# Predict on the test set
y_pred = poly_model.predict(X_test_poly)

# Calculate Mean Squared Error and R² Score
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R² Score: {r2:.2f}")

# Plotting Actual vs Predicted
# Plotting Actual vs Predicted Adjusted Charging Demand
plt.figure(figsize=(8, 6))
plt.scatter(y_test, y_pred, color='orange', edgecolors='k', alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
plt.xlabel('Actual EV Charging Efficiency (%)')
plt.ylabel('Predicted EV Charging Efficiency (%)')
plt.title('Actual vs Predicted EV Charging Efficiency (%) using PolynomialFeatures')
plt.grid(True)
plt.show()


# Predicted EV Charging Efficiency (%) using DecisionTreeRegressor

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor  # Importing Decision Tree Regressor
from sklearn.metrics import mean_squared_error, r2_score

# Assuming df_without_outliers is defined and contains the relevant columns
X = df_without_outliers[['EV Charging Demand (kW)',
    'Grid Availability',
    'Grid Stability Index',
    'Weather Conditions',
    'Battery Storage (kWh)',
    'Number of EVs Charging',
    'Peak Demand (kW)',
    'Power Outages (hours)',
    'Charging Station Capacity (kW)',
    'Effective Charging Capacity (kW)',
    'Solar Energy Production (kW)',
    'Wind Energy Production (kW)',
    'Renewable Energy Usage (%)'
]]

# Target variable for prediction
y = df_without_outliers['EV Charging Efficiency (%)']

# Splitting the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Decision Tree Regressor
dt_model = DecisionTreeRegressor(random_state=42)  

#dt_model = DecisionTreeRegressor( criterion='squared_error',splitter='best',max_depth=65,random_state=42)
dt_model.fit(X_train, y_train)

# Predict on the test set
y_pred = dt_model.predict(X_test)

# Calculate Mean Squared Error and R² Score
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Print evaluation metrics
print(f"Mean Squared Error: {mse:.2f}")
print(f"R² Score: {r2:.2f}")

# Plotting Actual vs Predicted
plt.figure(figsize=(8, 6))
plt.scatter(y_test, y_pred, color='blue', edgecolors='k', alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
plt.xlabel('Actual EV Charging Efficiency (%)')
plt.ylabel('Predicted EV Charging Efficiency (%)')
plt.title('Actual vs Predicted EV Charging Efficiency (%) using Decision Tree Regressor')
plt.grid(True)
plt.show()


# Predicting Net Energy Cost ($) for  Electric Vehicle Charging Using Random Forest Regression

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

# Define input features (X) and target variable (y)
X = df_without_outliers[['EV Charging Demand (kW)',
'Solar Energy Production (kW)',
'Wind Energy Production (kW)',
'Electricity Price ($/kWh)',
'Grid Availability',
'Weather Conditions',
'Battery Storage (kWh)',
'Charging Station Capacity (kW)',
'EV Charging Efficiency (%)',
'Number of EVs Charging',
'Peak Demand (kW)',
'Renewable Energy Usage (%)',
'Grid Stability Index',
'Carbon Emissions (kgCO2/kWh)',
'Power Outages (hours)',
'Energy Savings ($)',
'Total Renewable Energy Production (kW)',
'Effective Charging Capacity (kW)',
'Adjusted Charging Demand (kW)'
#'Net Energy Cost ($)'
                        ]]

y = df_without_outliers['Net Energy Cost ($)']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Random Forest Regressor
rf_model = RandomForestRegressor(n_estimators=30, random_state=42)
#rf_model = RandomForestRegressor(n_estimators=30, max_depth=8, min_samples_split=2, min_samples_leaf=2, random_state=42)

# Fit the model to the training data
rf_model.fit(X_train, y_train)

# Predict on the test set
y_pred = rf_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R² Score: {r2:.2f}")

# Plotting Actual vs Predicted Renewable Energy Usage (%)
plt.figure(figsize=(8, 6))
plt.scatter(y_test, y_pred, color='blue', edgecolors='k', alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
plt.xlabel('Actual Net Energy Cost ($)')
plt.ylabel('Predicted Net Energy Cost ($)')
plt.title('Actual vs Predicted Net Energy Cost ($) using Random Forest')
plt.grid(True)
plt.show()


In [3]:
pip install ipywidgets


Collecting fqdn (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.6->ipywidgets)
  Downloading fqdn-1.5.1-py3-none-any.whl.metadata (1.4 kB)
Collecting isoduration (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.6->ipywidgets)
  Downloading isoduration-20.11.0-py3-none-any.whl.metadata (5.7 kB)
Collecting uri-template (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.6->ipywidgets)
  Downloading uri_template-1.3.0-py3-none-any.whl.metadata (8.8 kB)
Collecting webcolors>=24.6.0 (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.6->ipywidgets)
  Downloading webcolors-24.8.0-py3-none-any.whl.metadata (2.6 kB)
Downloading webcolors-24.8.0-py3-none-any.whl (15 kB)
Downloa

In [5]:
import streamlit as st
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler

# Assuming lr_model is already trained and X.columns is available

# Function to make predictions
def predict_efficiency():
    try:
        # Get user inputs from the form
        inputs = [
            st.number_input('EV Charging Demand (kW)', value=0.0),
            st.number_input('Grid Availability', value=0.0),
            st.number_input('Grid Stability Index', value=0.0),
            st.number_input('Weather Conditions', value=0.0),
            st.number_input('Battery Storage (kWh)', value=0.0),
            st.number_input('Number of EVs Charging', value=0.0),
            st.number_input('Peak Demand (kW)', value=0.0),
            st.number_input('Power Outages (hours)', value=0.0),
            st.number_input('Charging Station Capacity (kW)', value=0.0),
            st.number_input('Effective Charging Capacity (kW)', value=0.0),
            st.number_input('Total Renewable Energy Production (kW)', value=0.0),
            st.number_input('Renewable Energy Usage (%)', value=0.0)
        ]

        # Create DataFrame for prediction
        input_df = pd.DataFrame([inputs], columns=X.columns)

        # Make prediction using the trained model
        prediction = lr_model.predict(input_df)

        # Display the prediction result
        st.success(f"Predicted EV Charging Efficiency: {prediction[0]:.2f}%")

    except ValueError:
        st.error("Please enter valid numerical values.")

# Streamlit app title
st.title("EV Charging Efficiency Predictor")

# Predict button
if st.button("Predict Efficiency"):
    predict_efficiency()

2024-11-06 18:40:20.183 
  command:

    streamlit run C:\Users\vishn\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
