<a href="https://colab.research.google.com/github/workingbetter/ITNPBD5_Dissertation/blob/main/standard_scalar(keras).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
import os
import pandas as pd

def extract_features(file_path, trip_name):
    df = pd.read_csv(file_path, delimiter=';', encoding='latin-1')

    # List of necessary columns for the analysis
    necessary_columns = ['Time [s]', 'Velocity [km/h]', 'SoC [%]', 'Ambient Temperature [°C]']

    # Drop rows with NaN values only in the necessary columns
    df.dropna(subset=necessary_columns, inplace=True)

    # Reset index after dropping rows
    df.reset_index(drop=True, inplace=True)

    # If dataframe is empty after dropping NaN values, return None
    if df.empty:
        print(f"Skipped {trip_name} as it's empty after dropping NaN rows.")
        return None

    trip_time = df['Time [s]'].iloc[-1] / 60  # Convert from seconds to minutes
    avg_velocity = df['Velocity [km/h]'].mean()
    initial_soc = df['SoC [%]'].iloc[0]  # First entry
    final_soc = df['SoC [%]'].iloc[-1]  # Last entry
    avg_ambient_temp = df['Ambient Temperature [°C]'].mean()

    return pd.DataFrame({
        'Trip': [trip_name],
        'Trip Time [min]': [trip_time],  # Updated column name
        'Average Velocity [km/h]': [avg_velocity],
        'Initial SoC [%]': [initial_soc],
        'Final SoC [%]': [final_soc],
        'Average Ambient Temperature [°C]': [avg_ambient_temp]
    })



folder_path = "/content/drive/MyDrive/ITNPBD5/Measurement_Data/Trips"

all_files = sorted([os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.csv')])

master_df = pd.DataFrame()  # Empty dataframe to store all the extracted features

for file in all_files:
    trip_name = os.path.basename(file).replace('.csv', '')  # Extracting the file name without extension
    trip_features = extract_features(file, trip_name)
    master_df = pd.concat([master_df, trip_features], ignore_index=True)

# Calculate distance (Km) for each trip
master_df['Distance (Km)'] = (master_df['Trip Time [min]'] / 60) * master_df['Average Velocity [km/h]']

# Calculate discharge rate (%/km) for each trip
master_df['Discharge Rate (%/km)'] = (master_df['Final SoC [%]'] - master_df['Initial SoC [%]']) / master_df['Distance (Km)']

# Calculate the delta_SoC
master_df['delta_SoC'] = master_df['Final SoC [%]'] - master_df['Initial SoC [%]']


In [15]:
display(master_df)

Unnamed: 0,Trip,Trip Time [min],Average Velocity [km/h],Initial SoC [%],Final SoC [%],Average Ambient Temperature [°C],Distance (Km),Discharge Rate (%/km),delta_SoC
0,TripA01,16.815000,26.502170,86.9,81.5,30.769972,7.427233,-0.727054,-5.4
1,TripA02,23.548333,59.909267,80.3,67.3,31.127573,23.512723,-0.552892,-13.0
2,TripA03,11.175000,68.826493,83.5,75.1,23.334916,12.818934,-0.655281,-8.4
3,TripA04,6.871667,93.645415,75.1,66.7,24.423957,10.725001,-0.783217,-8.4
4,TripA05,22.776667,32.646337,66.7,60.2,24.587443,12.392912,-0.524493,-6.5
...,...,...,...,...,...,...,...,...,...
63,TripB32,13.258333,64.438389,52.6,38.1,2.193433,14.239094,-1.018323,-14.5
64,TripB33,9.131667,46.205445,77.4,71.6,4.194641,7.032212,-0.824776,-5.8
65,TripB35,22.706667,40.740879,85.4,71.5,7.618703,15.418159,-0.901534,-13.9
66,TripB36,47.533333,48.871933,72.1,44.5,7.201695,38.717431,-0.712857,-27.6


In [12]:
master_df.to_csv("/content/drive/MyDrive/ITNPBD5/misc/master_data3.csv")

In [26]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, Dropout

# Extracting features and target
X = master_df[['Trip Time [min]', 'Average Velocity [km/h]', 'Initial SoC [%]', 'Average Ambient Temperature [°C]', 'Distance (Km)']]
y = master_df['delta_SoC']

# Splitting data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3071806)

# Keep a copy of the original X_test for 'Initial SoC [%]'
X_test_original = X_test.copy()

# Scaling the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Building the neural network model
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='linear'))

# Compiling the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Training the model
model.fit(X_train, y_train, epochs=500, batch_size=10, validation_split=0.2)

# Predicting on the test set
y_pred_delta = model.predict(X_test).flatten()

# Adjust the predictions to get final SoC
y_pred_final = y_pred_delta + X_test_original['Initial SoC [%]'].values

# Calculate the Mean Squared Error using adjusted predictions
mse = mean_squared_error(y_test + X_test_original['Initial SoC [%]'].values, y_pred_final)
print("Mean Squared Error for Neural Network:", mse)


Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

In [38]:
# Predicting using new input data
trip_time = 23.54
avg_velocity = 59.9
initial_soc = 80.3
avg_ambient_temp = 31

distance = (trip_time / 60) * avg_velocity

input_data = pd.DataFrame({
    'Trip Time [min]': [trip_time],
    'Average Velocity [km/h]': [avg_velocity],
    'Initial SoC [%]': [initial_soc],
    'Average Ambient Temperature [°C]': [avg_ambient_temp],
    'Distance (Km)': [distance]
})

# Scale the input data
input_data = scaler.transform(input_data)

# Predict delta_SoC using the trained neural network
predicted_delta_soc = model.predict(input_data).flatten()[0]

# Adjust the prediction to get final SoC
predicted_final_soc = predicted_delta_soc + initial_soc
print(f"Predicted Final SoC [%]: {predicted_final_soc:.2f}")



Predicted Final SoC [%]: 67.59
