In [1]:
import os, sys
import pandas as pd

In [2]:
folder_path = '/home/ubuntu/Maverick/can-fd/CAN-FD_Intrusion_Dataset/'
csv_files = [file for file in os.listdir(folder_path) if file.endswith('.csv')]

In [3]:
print(csv_files)

['CANFD_Malfunction.csv', 'CANFD_Flooding.csv', 'CANFD_Fuzzing.csv']


In [4]:
def process_csv(filename):
    
    print("Dataset Processing: ", filename)
    
    processed_data, further_processed_data = [], []
    
    with open(folder_path + '/' + filename, 'r') as f:
        data = [row.strip().split(',') for row in f.readlines()]
        
    df = pd.DataFrame(data)

    for _, row in df.iterrows():
        timestamp = float(row[0])
        arb_id = int(row[1], 16)
        dlc = int(row[2])  
        data_padding = [int(0)] * (64 - dlc) # Pad with zeros to make 64 data fields
        data_cols = [int(val, 16) for val in row[3:3 + dlc]] # Extract data columns and convert to decimal
        data = data_padding + data_cols
        label = 1 if row.iloc[3+dlc] == 'T' else 0
        processed_row = [timestamp, arb_id] + data + [label]
        processed_data.append(processed_row)

    processed_df = pd.DataFrame(processed_data, columns=['Timestamp', 'Arbitration_ID'] + [f'Data[{i}]' for i in range(len(data))] + ['Label'])
    
    # Iterate over each group
    for _, group_df in processed_df.groupby('Arbitration_ID'):
        # Sort by Timestamp within each group
        group_df = group_df.sort_values(by='Timestamp', ascending=True)
        # Calculate dTIME (time difference between consecutive messages with the same Arbitration_ID)
        group_df['dTIME'] = group_df['Timestamp'].diff().fillna(0)
        group_df = group_df.drop(columns=['Timestamp'])
        group_df = group_df[['dTIME', 'Arbitration_ID'] + [col for col in group_df.columns if col != 'Arbitration_ID' and col != 'dTIME']]
        further_processed_data.append(group_df)
        
    further_processed_df = pd.concat(further_processed_data)   
    further_processed_df.to_csv("processed_"+filename, index=False) 
    print("Dataset Processed: ", filename)
    del further_processed_df, processed_df, df
    

In [5]:
for file in csv_files:
    process_csv(file)

Dataset Processing:  CANFD_Malfunction.csv
Dataset Processed:  CANFD_Malfunction.csv
Dataset Processing:  CANFD_Flooding.csv
Dataset Processed:  CANFD_Flooding.csv
Dataset Processing:  CANFD_Fuzzing.csv
Dataset Processed:  CANFD_Fuzzing.csv
