<a href="https://colab.research.google.com/github/workingbetter/ITNPBD5_Dissertation/blob/main/ML_discharge.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import os
import pandas as pd

def extract_features(file_path, trip_name):
    df = pd.read_csv(file_path, delimiter=';', encoding='latin-1')

    # List of necessary columns for the analysis
    necessary_columns = ['Time [s]', 'Velocity [km/h]', 'SoC [%]', 'Ambient Temperature [°C]']

    # Drop rows with NaN values only in the necessary columns
    df.dropna(subset=necessary_columns, inplace=True)

    # Reset index after dropping rows
    df.reset_index(drop=True, inplace=True)

    # If dataframe is empty after dropping NaN values, return None
    if df.empty:
        print(f"Skipped {trip_name} as it's empty after dropping NaN rows.")
        return None

    trip_time = df['Time [s]'].iloc[-1] / 60  # Convert from seconds to minutes
    avg_velocity = df['Velocity [km/h]'].mean()
    initial_soc = df['SoC [%]'].iloc[0]  # First entry
    final_soc = df['SoC [%]'].iloc[-1]  # Last entry
    avg_ambient_temp = df['Ambient Temperature [°C]'].mean()

    return pd.DataFrame({
        'Trip': [trip_name],
        'Trip Time [min]': [trip_time],  # Updated column name
        'Average Velocity [km/h]': [avg_velocity],
        'Initial SoC [%]': [initial_soc],
        'Final SoC [%]': [final_soc],
        'Average Ambient Temperature [°C]': [avg_ambient_temp]
    })



folder_path = "/content/drive/MyDrive/ITNPBD5/Measurement_Data/Trips"

all_files = sorted([os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.csv')])

master_df = pd.DataFrame()  # Empty dataframe to store all the extracted features

for file in all_files:
    trip_name = os.path.basename(file).replace('.csv', '')  # Extracting the file name without extension
    trip_features = extract_features(file, trip_name)
    master_df = pd.concat([master_df, trip_features], ignore_index=True)

# Calculate distance (Km) for each trip
master_df['Distance (Km)'] = (master_df['Trip Time [min]'] / 60) * master_df['Average Velocity [km/h]']

# Calculate discharge rate (%/km) for each trip
master_df['Discharge Rate (%/km)'] = (master_df['Final SoC [%]'] - master_df['Initial SoC [%]']) / master_df['Distance (Km)']




In [6]:
display(master_df)

Unnamed: 0,Trip,Trip Time [min],Average Velocity [km/h],Initial SoC [%],Final SoC [%],Average Ambient Temperature [°C],Distance (Km),Discharge Rate (%/km)
0,TripA01,16.815000,26.502170,86.9,81.5,30.769972,7.427233,-0.727054
1,TripA02,23.548333,59.909267,80.3,67.3,31.127573,23.512723,-0.552892
2,TripA03,11.175000,68.826493,83.5,75.1,23.334916,12.818934,-0.655281
3,TripA04,6.871667,93.645415,75.1,66.7,24.423957,10.725001,-0.783217
4,TripA05,22.776667,32.646337,66.7,60.2,24.587443,12.392912,-0.524493
...,...,...,...,...,...,...,...,...
63,TripB32,13.258333,64.438389,52.6,38.1,2.193433,14.239094,-1.018323
64,TripB33,9.131667,46.205445,77.4,71.6,4.194641,7.032212,-0.824776
65,TripB35,22.706667,40.740879,85.4,71.5,7.618703,15.418159,-0.901534
66,TripB36,47.533333,48.871933,72.1,44.5,7.201695,38.717431,-0.712857


In [7]:
master_df.to_csv("/content/drive/MyDrive/ITNPBD5/misc/master_data2.csv")