In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import folium
from scipy.stats import mode

In [2]:
def read_data(path='data/raw/'):
    garage_class = pd.read_csv(path + 'Garage.csv')
    home_class = pd.read_csv(path + 'Home.csv')
    drive_class = pd.read_csv(path + 'Drive.csv')
    classroom_class = pd.read_csv(path + 'Class.csv')
    campus_class = pd.read_csv(path + 'Campus.csv')
    return garage_class, home_class, drive_class, classroom_class, campus_class

In [3]:
garage, home, drive, classroom, campus = read_data(path='data/raw/S1/')

In [4]:
print(garage.shape, home.shape, drive.shape, classroom.shape, campus.shape)

(222, 40) (637, 40) (560, 40) (612, 40) (375, 40)


In [5]:
# Use this to identify where nulls are, if any
#campus[campus['location_latitude'].isnull()]

In [6]:
def plot_movement(data):
    fig1 = go.Figure()
    for ax in ['accelerometer_x', 'accelerometer_y', 'accelerometer_z']:
        fig1.add_trace(go.Scatter(x=data['seconds_elapsed'], y=data[ax], name=ax))
    fig1.show()
    fig2 = go.Figure()
    for ax in ['gyroscope_x', 'gyroscope_y', 'gyroscope_z']:
        fig2.add_trace(go.Scatter(x=data['seconds_elapsed'], y=data[ax], name=ax))
    fig2.show()
    fig3 = go.Figure()
    for ax in ['magnetometer_x', 'magnetometer_y', 'magnetometer_z']:
        fig3.add_trace(go.Scatter(x=data['seconds_elapsed'], y=data[ax], name=ax))
    fig3.show()

In [7]:
# Home
plot_movement(home)

In [8]:
plot_movement(garage)

In [9]:
plot_movement(drive)

In [10]:
plot_movement(classroom)

In [11]:
plot_movement(campus)

In [12]:
def plot_map(data):
    coordinates = [(row.location_latitude, row.location_longitude) for _, row in data.iterrows()]
    path = folium.Map(location=[data.location_latitude.mean(), data.location_longitude.mean()], zoom_start=15)
    folium.PolyLine(coordinates, color='red', weight=3.5).add_to(path)
    display(path)

In [13]:
plot_map(home)

In [14]:
plot_map(garage)

In [15]:
plot_map(drive)

In [16]:
plot_map(classroom)

In [17]:
plot_map(campus)

In [18]:
campus.columns

Index(['seconds_elapsed', 'accelerometerUncalibrated_z',
       'accelerometerUncalibrated_y', 'accelerometerUncalibrated_x',
       'gyroscope_z', 'gyroscope_y', 'gyroscope_x', 'gyroscopeUncalibrated_z',
       'gyroscopeUncalibrated_y', 'gyroscopeUncalibrated_x',
       'location_bearingAccuracy', 'location_speedAccuracy',
       'location_verticalAccuracy', 'location_horizontalAccuracy',
       'location_speed', 'location_bearing', 'location_altitude',
       'location_longitude', 'location_latitude', 'magnetometer_z',
       'magnetometer_y', 'magnetometer_x', 'microphone_dBFS',
       'accelerometer_z', 'accelerometer_y', 'accelerometer_x',
       'orientation_qz', 'orientation_qy', 'orientation_qx', 'orientation_qw',
       'orientation_roll', 'orientation_pitch', 'orientation_yaw',
       'magnetometerUncalibrated_z', 'magnetometerUncalibrated_y',
       'magnetometerUncalibrated_x', 'totalAcceleration_z',
       'totalAcceleration_y', 'totalAcceleration_x', 'class'],
      dtyp

## Extracting features

In [19]:
# drop features
def drop_columns(data):
    data.drop(['accelerometerUncalibrated_z', 'accelerometerUncalibrated_y', 'accelerometerUncalibrated_x', 'gyroscopeUncalibrated_z', 'gyroscopeUncalibrated_y', 
               'gyroscopeUncalibrated_x', 'magnetometerUncalibrated_z', 'magnetometerUncalibrated_y', 'magnetometerUncalibrated_x', 'location_bearingAccuracy', 
               'location_speedAccuracy','location_verticalAccuracy', 'location_horizontalAccuracy'], axis=1, inplace=True)
    return data

In [20]:
drive = drop_columns(drive)
drive

Unnamed: 0,seconds_elapsed,orientation_qz,orientation_qy,orientation_qx,orientation_qw,orientation_roll,orientation_pitch,orientation_yaw,magnetometer_z,magnetometer_y,...,location_altitude,location_longitude,location_latitude,accelerometer_z,accelerometer_y,accelerometer_x,gyroscope_z,gyroscope_y,gyroscope_x,class
0,0,0.991742,0.097488,-0.021084,0.080613,0.058638,-0.191129,-2.973758,4.932113,-53.116992,...,154.899994,-86.909665,40.425956,0.011313,0.006261,0.005731,0.000497,0.000767,0.000403,Drive
1,1,0.991511,0.099271,-0.020623,0.081299,0.058172,-0.194734,-2.972285,5.205496,-52.111079,...,154.899994,-86.909665,40.425956,-0.051236,0.160595,0.006007,0.000378,0.000433,0.002727,Drive
2,2,0.987105,0.135741,-0.016618,0.081518,0.057046,-0.268563,-2.969105,4.690778,-50.759319,...,154.899994,-86.909665,40.425956,-0.133686,0.658564,0.002201,0.000764,-0.002902,0.002267,Drive
3,3,0.981605,0.172108,-0.010522,0.081791,0.051852,-0.342851,-2.966355,3.986472,-52.037147,...,154.899994,-86.910003,40.425912,0.047631,-0.135057,-0.011834,0.001113,-0.007573,0.001030,Drive
4,4,0.987689,0.130298,-0.013008,0.082227,0.048799,-0.258254,-2.969144,5.166334,-52.706927,...,154.899994,-86.910102,40.425906,0.213277,-1.012750,-0.007606,0.000734,-0.000148,-0.004782,Drive
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
555,555,-0.751090,-0.090566,0.079586,0.649097,0.002040,-0.241711,1.716473,-78.470399,-11.395776,...,160.199997,-86.915690,40.384712,-0.002386,0.000347,0.000129,-0.000001,-0.000310,-0.000084,Drive
556,556,-0.748644,-0.090679,0.079514,0.651910,0.000851,-0.241794,1.708817,-78.428553,-11.498744,...,160.199997,-86.915690,40.384712,-0.002771,-0.001185,0.000764,-0.000345,-0.000808,0.000271,Drive
557,557,-0.746101,-0.090248,0.080211,0.654793,0.001549,-0.242069,1.701157,-78.573733,-11.248766,...,160.199997,-86.915690,40.384712,-0.019882,-0.002066,-0.000743,0.000283,0.000698,-0.000096,Drive
558,558,-0.743280,-0.089965,0.080725,0.657969,0.001663,-0.242333,1.692613,-79.257298,-10.162234,...,160.199997,-86.915690,40.384712,-0.006157,-0.001027,0.000171,-0.000001,0.000093,0.000176,Drive


In [24]:
from scipy.stats import skew, kurtosis, mode
from scipy.signal import find_peaks
from scipy.fftpack import fft

WSIZE = 100
OVERLAP = 50

#filter_columns = [col for col in drive.columns if ('_x' in col) and ('_y' in col) and ('_z' in col)]
def select_sensors(data):
    accelerometer_columns = [col for col in data.columns if 'accelerometer' in col]
    gyroscope_columns = [col for col in data.columns if 'gyroscope' in col]
    magneto_columns = [col for col in data.columns if 'magnetometer' in col]
    totalAccel_columns = [col for col in data.columns if 'totalAcceleration' in col]
    orientation_columns = ['orientation_roll', 'orientation_pitch', 'orientation_yaw']
    microphone_column = ['microphone_dBFS']
    return accelerometer_columns, gyroscope_columns, magneto_columns, totalAccel_columns, orientation_columns, microphone_column

def extract_features(window, data):
    features = []
    accelerometer_columns, gyroscope_columns, magneto_columns, totalAccel_columns, _, _ = select_sensors(data)
    for sensor, columns in {'accelerometer': accelerometer_columns,
                            'gyroscope': gyroscope_columns,
                            'magnetometer': magneto_columns,
                            'totalAcceleration': totalAccel_columns}.items():
        for axis in ['x', 'y', 'z']:
            axis_data = window[f'{sensor}_{axis}']
            mean = axis_data.mean()
            std = axis_data.std()
            mean_abs_dev = (axis_data - axis_data.mean()).abs().mean()
            minval = axis_data.min()
            maxval = axis_data.max()
            #rng = maxval - minval
            median = axis_data.median()
            median_abs_dev = np.median(np.abs(axis_data - median))
            q1, q3 = np.percentile(axis_data, [25,75])
            iqr = q3-q1
            neg_count = np.sum(axis_data < 0)
            pos_count = np.sum(axis_data > 0)
            over_mean_count = np.sum(axis_data > mean)

    #peaks_x = find_peaks(axis_data['accelerometer_x'])
    #peaks_y = find_peaks(axis_data['accelerometer_y'])
    #peaks_z = find_peaks(axis_data['accelerometer_z'])
            
            peaks, _ = find_peaks(axis_data)
            num_peaks = len(peaks)

    #num_peaks_x = len(peaks_x)
    #num_peaks_y = len(peaks_y)
    #num_peaks_z = len(peaks_z)

            #skewness = skew(axis_data)
            #kurts = kurtosis(axis_data)
            energy = np.sum(axis_data ** 2)
            avg_accel = np.mean(np.sqrt(np.sum(axis_data ** 2)))
            sma = np.sum(np.abs(axis_data))

    # FFT
            fft_vals = np.abs(fft(axis_data.values))[:WSIZE//2]
            max_fft_freq = np.argmax(fft_vals)
            max_fft_vals = np.max(fft_vals)
            sum_fft_vals = np.sum(fft_vals)

            if 'accelerometer' in columns or 'gyroscope' in columns or 'magnetometer' in columns:
                magnitude = np.sqrt(np.sum(axis_data ** 2))
            else:
                magnitude = 0

            features.extend([mean, std, mean_abs_dev, minval, maxval, median, median_abs_dev, iqr, neg_count, pos_count, over_mean_count,
                             num_peaks, energy, avg_accel, sma, max_fft_freq, max_fft_vals, sum_fft_vals, magnitude])

    return features

def extract_orientation_features(window):
    #_, _, _, _, orientation_columns, _ = select_sensors(data)
    mean_roll = window['orientation_roll'].mean()
    mean_pitch = window['orientation_pitch'].mean()
    mean_yaw = window['orientation_yaw'].mean()
    return mean_roll, mean_pitch, mean_yaw

def extract_microphone_loudness(window):
    mean_decibels = window['microphone_dBFS'].mean()
    return mean_decibels

In [26]:
accelerometer_columns, gyroscope_columns, magneto_columns, totalAccel_columns, _, _ = select_sensors(drive)
window_features = []
for i in range(0, len(drive) - WSIZE + 1, OVERLAP):
    window = drive[accelerometer_columns + gyroscope_columns + magneto_columns + totalAccel_columns].iloc[i:i+WSIZE]
    features = extract_features(window, drive)
    window_features.append(features)

columns = []
for sensors in ['accelerometer', 'gyroscope', 'magnetometer', 'totalAcceleration']:
    for axis in ['x', 'y', 'z']:
        for feature in ['mean', 'std', 'mean_abs_dev', 'minval', 'maxval', 'median', 'median_abs_dev', 'iqr', 'neg_count', 'pos_count', 'over_mean_count', 'num_peaks',
                        'energy', 'avg_accel', 'sma', 'max_fft_freq', 'max_fft_vals', 'sum_fft_vals', 'magnitude']:
            columns.append(f'{sensors}_{axis}_{feature}')
window_features_df = pd.DataFrame(window_features, columns=columns)

orientation_means = []
_, _, _, _, orientation_columns, _ = select_sensors(drive)
for i in range(0, len(drive) - WSIZE + 1, OVERLAP):
    window = drive[orientation_columns].iloc[i:i+WSIZE]
    mean_roll, mean_pitch, mean_yaw = extract_orientation_features(window)
    orientation_means.append([mean_roll, mean_pitch, mean_yaw])

microphone_loudness = []
_, _, _, _, _, microphone_column = select_sensors(drive)
for i in range(0, len(drive) - WSIZE + 1, OVERLAP):
    window = drive[microphone_column].iloc[i:i+WSIZE]
    mean_loudness = extract_microphone_loudness(window)
    microphone_loudness.append(mean_loudness)

# return df and concat
orientation_df = pd.DataFrame(orientation_means, columns=['mean_roll', 'mean_pitch', 'mean_yaw'])
loudness_df = pd.DataFrame(microphone_loudness, columns = ['mean_loudness'])
pp_data = pd.concat([window_features_df, orientation_df, loudness_df], axis=1)

In [27]:
pp_data

Unnamed: 0,accelerometer_x_mean,accelerometer_x_std,accelerometer_x_mean_abs_dev,accelerometer_x_minval,accelerometer_x_maxval,accelerometer_x_median,accelerometer_x_median_abs_dev,accelerometer_x_iqr,accelerometer_x_neg_count,accelerometer_x_pos_count,...,totalAcceleration_z_avg_accel,totalAcceleration_z_sma,totalAcceleration_z_max_fft_freq,totalAcceleration_z_max_fft_vals,totalAcceleration_z_sum_fft_vals,totalAcceleration_z_magnitude,mean_roll,mean_pitch,mean_yaw,mean_loudness
0,-0.0065,0.109396,0.061466,-0.515725,0.427338,0.003827,0.029739,0.060405,42,58,...,97.300273,972.884672,0,972.884672,1029.661231,0,0.020086,-0.196958,2.134471,-24.880833
1,0.004452,0.247859,0.112874,-0.68456,1.764284,0.002167,0.041382,0.08648,46,54,...,97.916811,978.898672,0,978.898672,1056.84183,0,0.007206,-0.174078,2.5804,-23.85375
2,0.02662,0.352638,0.196921,-0.695835,1.764284,0.004084,0.067888,0.133311,45,55,...,97.53204,975.033185,0,975.033185,1054.903392,0,0.003548,-0.20011,-0.213223,-19.476111
3,0.017308,0.352146,0.192489,-0.905201,1.641494,0.001017,0.052222,0.098661,42,58,...,97.635269,976.19563,0,976.19563,1035.484855,0,-0.01538,-0.196657,-2.718091,-24.010417
4,-0.017276,0.396275,0.201963,-1.542065,1.253675,0.000396,0.017982,0.036078,45,55,...,97.361911,973.456899,0,973.456899,1027.956005,0,-0.013678,-0.207347,-1.521952,-31.684722
5,-0.039368,0.32997,0.160299,-1.542065,1.253675,-0.000246,0.042653,0.081377,51,49,...,96.847629,968.393727,0,968.393727,1017.392862,0,-0.009751,-0.230814,1.24328,-22.60625
6,0.000265,0.184424,0.087358,-0.695368,0.751128,-0.000316,0.026353,0.049504,51,49,...,97.823458,978.146252,0,978.146252,1026.522342,0,-0.020894,-0.179466,2.779922,-24.549861
7,0.038601,0.312334,0.157287,-0.660399,1.410966,-0.000307,0.003596,0.006686,54,46,...,97.393891,973.827708,0,973.827708,1020.839702,0,-4.9e-05,-0.200544,2.631356,-33.18375
8,0.025639,0.346848,0.1642,-1.226693,1.410966,-0.000842,0.022193,0.04278,58,42,...,97.081741,970.739556,0,970.739556,1013.412148,0,0.003191,-0.222562,0.981798,-26.007917
9,-0.012438,0.416628,0.261668,-1.556794,1.03631,-0.015899,0.123863,0.238745,58,42,...,97.937208,979.27598,0,979.27598,1029.355798,0,-0.039735,-0.190779,-0.740552,-19.490139
