In [42]:
import pandas as pd
from sklearn.metrics import r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import numpy as np
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
# Load the dataset
df = pd.read_csv('IEA-EV-dataEV_salesCarsHistorical.csv')
data=df.drop(['category','parameter','mode','unit'], axis=1)

# converting value column to a more usable log.
data['log_value'] = data['value'].apply(lambda x: np.log(x))
df=data

encoded_data = pd.get_dummies(data, columns=['region', 'powertrain'])
encoded_data
x = encoded_data.drop(['log_value','value'], axis=1)
y = encoded_data[['log_value']]

In [43]:
#using X_train,Y_train  instead of x,y 

# Scale the input features
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x)

In [44]:
# Build the neural network model
model = Sequential()
model.add(Dense(64, activation='relu', input_dim=x_train_scaled.shape[1]))
model.add(Dense(64, activation='relu'))
model.add(Dense(1))
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')


In [45]:
# Train the model
model.fit(x_train_scaled, y, epochs=100, batch_size=32, verbose=0)

<keras.callbacks.History at 0x226e176faf0>

In [46]:
# Evaluate the model
train_predictions = model.predict(x_train_scaled)



In [47]:
from sklearn import metrics
# Create DataFrame of predicted values
train_prediction = pd.DataFrame(train_predictions, columns=['log_values'])
# Converting back to usable values from log_values using exp
train_prediction['value'] = train_prediction['log_values'].apply(lambda x: np.exp(x)).astype(int)
# R squared Error
error_score = metrics.r2_score(df['value'], train_prediction['value'])
print("R squared Error: ", error_score)


R squared Error:  0.977221038846367


In [48]:
print(train_prediction['value'])

0           69
1          128
2           82
3          198
4          216
        ...   
829    1215153
830    1802884
831    4819932
832    7552347
833    2892738
Name: value, Length: 834, dtype: int32


In [49]:
print(df['value'])

0           49
1          170
2           80
3          100
4          190
        ...   
829     970000
830    1900000
831    4600000
832    7300000
833    2900000
Name: value, Length: 834, dtype: int64


In [50]:
# Extract the latest year from the dataset
latest_year = data['year'].max()

# Create a new DataFrame with future year, region, and powertrain
future_data = pd.DataFrame({
    'year': [latest_year + 1, latest_year + 2, latest_year + 3],
    'region': [data['region'].unique()[0]] * 3,
    'powertrain': [data['powertrain'].unique()[0]] * 3
})

In [55]:
# Perform one-hot encoding on the future data using the original encoder
encoded_future_data = pd.get_dummies(future_data, columns=['region', 'powertrain'])

encoded_future_data = encoded_future_data.reindex(columns=encoded_data.columns[:], fill_value=0)
encoded_future_data

Unnamed: 0,year,value,log_value,region_Australia,region_Austria,region_Belgium,region_Brazil,region_Canada,region_Chile,region_China,...,region_South Africa,region_Spain,region_Sweden,region_Switzerland,region_Turkiye,region_USA,region_United Kingdom,region_World,powertrain_BEV,powertrain_PHEV
0,2023,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,2024,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,2025,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [56]:
# Remove the 'value' column from the encoded future data
encoded_future_data = encoded_future_data.drop(['value','log_value'], axis=1)

In [57]:
# Predict the values for future years
future_prediction = model.predict(encoded_future_data)



In [58]:
# Create DataFrame of predicted values
future_prediction = pd.DataFrame(future_prediction, columns=['log_values'])
# Converting back to usable values from log_values using exp
future_prediction['value'] = future_prediction['log_values'].apply(lambda x: np.exp(x))

In [59]:
future_prediction

Unnamed: 0,log_values,value
0,6226.835938,inf
1,6229.914062,inf
2,6232.991699,inf
