In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df_A_events = pd.read_csv('../datasets/df_A_events.csv')
df_B_events = pd.read_csv('../datasets/df_B_events.csv')
df_C_events = pd.read_csv('../datasets/df_C_events.csv')
df_D_events = pd.read_csv('../datasets/df_D_events.csv')

csvs = [df_A_events, df_B_events, df_C_events, df_D_events]

In [3]:
for df in csvs:
    #TIME FEATURES
    df['Start time'] = pd.to_datetime(df['Start time'])
    df['End time'] = pd.to_datetime(df['End time'])
    df['Hour'] = df['Start time'].dt.hour
    df['Hour sin'] = np.sin(df['Hour'] * (2. * np.pi / 24))
    df['Hour cos'] = np.cos(df['Hour'] * (2. * np.pi / 24))
    df['Day of Week'] = df['Start time'].dt.dayofweek
    df['Duration'] = (df['End time'] - df['Start time']).dt.total_seconds()
    #events under 1 second would be 0 duration, so we set them to 0.5
    df.loc[df['Duration'] == 0, 'Duration'] = 0.5

    #DERIVED FEATURES
    df['Packet Rate'] = df['Packet speed'] / df['Duration']
    df['Data Rate'] = df['Data speed'] / df['Duration']
    df['Packet Size to Speed Ratio'] = df['Avg packet len'] / df['Packet speed']
    df['Data to Packet Ratio'] = df['Data speed'] / df['Packet speed']  

    #NORMALIZE
    df['Packet speed'] = (df['Packet speed'] - df['Packet speed'].min()) / (df['Packet speed'].max() - df['Packet speed'].min())
    df['Data speed'] = (df['Data speed'] - df['Data speed'].min()) / (df['Data speed'].max() - df['Data speed'].min())


feature_cols = ['Packet speed', 'Data speed', 'Packet Rate', 'Data Rate', 'Packet Size to Speed Ratio', 'Data to Packet Ratio','Duration', 'Hour sin', 'Hour cos', 'Day of Week']


 

In [4]:
df_A_events[feature_cols].head()

Unnamed: 0,Packet speed,Data speed,Packet Rate,Data Rate,Packet Size to Speed Ratio,Data to Packet Ratio,Duration,Hour sin,Hour cos,Day of Week
0,0.012829,0.035732,55600.0,73.0,0.024874,0.001313,1.0,-1.0,-1.83697e-16,0
1,0.015076,0.044053,127000.0,180.0,0.023717,0.001417,0.5,-1.0,-1.83697e-16,0
2,0.014848,0.040137,62700.0,82.0,0.022313,0.001308,1.0,-1.0,-1.83697e-16,0
3,0.013938,0.041605,59500.0,85.0,0.024975,0.001429,1.0,-1.0,-1.83697e-16,0
4,0.019272,0.055311,1397.321429,2.017857,0.019399,0.001444,56.0,-1.0,-1.83697e-16,0
