In [4]:
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
!pip install pyarrow



In [0]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
import gc
import math
import re

from numba import jit
from tqdm import tqdm
from collections import Counter

from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import roc_auc_score

import lightgbm as lgb

import warnings
warnings.simplefilter(action='ignore')

# change to path
PATH='/content/drive/My Drive/Colab Notebooks/grab/'
os.chdir(PATH)

In [0]:
PATH_LABELS = PATH + 'safety/labels'
PATH_FEATURES = PATH + 'safety/features'

## Read in
Here, I work on the dataset.

In [0]:
labels_df = pd.read_csv(f'{PATH_LABELS}/part-00000-e9445087-aa0a-433b-a7f6-7f4c19d78ad6-c000.csv')

In [3]:
train_df = pd.read_feather(f'{PATH_FEATURES}/train')

# train_df = pd.read_pickle(f'{PATH_FEATURES}/train_df.pkl')

train_df.head()

Unnamed: 0,bookingID,Accuracy,Bearing,acceleration_x,acceleration_y,acceleration_z,gyro_x,gyro_y,gyro_z,second,Speed,unique_counts
0,0,16.0,142.124496,-0.6608,-9.543246,-1.81492,-0.002753,-0.035185,0.014585,183.0,0.227891,1004
1,0,8.0,315.071838,-0.674707,-9.46474,-1.566092,-0.001937,-0.019093,0.009868,942.0,2.682516,1004
2,0,16.0,144.125198,-1.183429,-8.49126,-3.712085,0.022289,-0.002134,0.052846,123.0,11.216825,1004
3,0,16.0,243.958252,-0.600388,-11.128629,-0.485394,-0.034949,-0.021853,-0.01237,241.0,3.728397,1004
4,0,8.0,38.000866,-0.88959,-8.826669,-1.920493,0.041287,-0.01765,0.033933,1037.0,12.354941,1004


In [4]:
len(train_df)

16116704

## EDA

In [5]:
ids = train_df['bookingID'].unique().tolist()

len(ids)

19982

In [0]:
# # explore an ID
# example = train_df[(train_df['bookingID'] == 0) |
#                    (train_df['bookingID'] == 1709396983975)]

# ids = example['bookingID'].unique()

# print(ids)

# # highest accuracy
# np.sort(example['Accuracy'].values)[::-1]

# example[example['Accuracy'] > 20]

In [0]:
# example.describe()

## Feature engineering
Some ideas are taken from this [discussion](https://www.kaggle.com/c/axa-driver-telematics-analysis/discussion/12848#latest-66913). 

In [0]:
def total_a(df):
    # acceleration combining all directions
    total_a = np.power(df['acceleration_x'].values, 2) + \
                np.power(df['acceleration_y'].values, 2) + \
                np.power(df['acceleration_z'].values, 2)
    
    df['total_a'] = np.sqrt(total_a)
    
def total_gyro(df):
    # gyro combining all directions
    total_gyro = np.power(df['gyro_x'].values, 2) + \
                 np.power(df['gyro_y'].values, 2) + \
                 np.power(df['gyro_z'].values, 2)
    
    df['total_gyro'] = np.sqrt(total_gyro)
    
def roll(df):
    # roll  
    yz = df[['acceleration_y', 'acceleration_z']].values
    y, z = yz[:, 0], yz[:, 1]
       
    df['roll'] = np.arctan2(y, z)
      
def pitch(df):
    # pitch
    xyz = df[['acceleration_x', 'acceleration_y', 'acceleration_z']].values
    x, y, z = xyz[:, 0], xyz[:, 1], xyz[:, 2]
    
    df['pitch'] = np.arctan2(-x, np.sqrt(np.power(y, 2) + np.power(z, 2)))
    
def horsepower(df):
    df['horsepower'] = df['total_a'] * df['Speed']
    
def aggressive_turn(df):
    pass
  
def jerk(df):
    pass
  
def a_threshold(df):
    # and difference > threshold
    pass  
  
def fe(df):
    total_a(df)
    total_gyro(df)
    roll(df)
    pitch(df)
    horsepower(df)

In [9]:
%%time
fe(train_df)

CPU times: user 5.46 s, sys: 5.69 s, total: 11.2 s
Wall time: 11.1 s


In [48]:
exclude = ['label', 'bookingID', 'unique_counts']

features = [c for c in train_df.columns if c not in exclude]

len(features)

features

['Accuracy',
 'Bearing',
 'acceleration_x',
 'acceleration_y',
 'acceleration_z',
 'gyro_x',
 'gyro_y',
 'gyro_z',
 'second',
 'Speed',
 'total_a',
 'total_gyro',
 'roll',
 'pitch',
 'horsepower']

## Accuracy
Extract the `TOP_N` highest accuracy features.

In [0]:
@jit(nopython=True)
def accuracy_features(accuracy, feature, TOP_N=1):
    idxs = np.argsort(accuracy)[::-1][:TOP_N]
    return feature[idxs]

In [0]:
# %%time
# TOP_N_SPEED_2 = np.empty((len(ids), TOP_N))
# # TOP_N_HP = np.empty((len(ids), TOP_N))

# for i in tqdm(range(len(ids))):
#     accuracy = train_df[train_df['bookingID'] == ids[i]]['Accuracy'].values
#     features = train_df[train_df['bookingID'] == ids[i]]['Speed'].values

#     TOP_N_SPEED_2[i] = accuracy_features(accuracy, features, 3)    

In [0]:
def top_accuracy_f(bookingID, df, f, N=1):
  
    subset = df[df['bookingID'] == bookingID]   
    
    # get indices with top N highest accuracy
    idxs = np.argsort(subset['Accuracy'].values)[::-1][:N]    
    
    return subset.iloc[idxs][f].values.T

In [72]:
gc.collect()

891

In [0]:
# %%time
# TOP_N = 3

# TOP_N_SPEED = np.empty((len(ids), TOP_N))
# TOP_N_HP = np.empty((len(ids), TOP_N))

# for i in range(len(ids)):
#     TOP_N_SPEED[i] = top_accuracy_f(ids[i], train_df, 'Speed', TOP_N)
#     TOP_N_HP[i] = top_accuracy_f(ids[i], train_df, 'horsepower', TOP_N)

## Normalization

In [0]:
# from sklearn.preprocessing import StandardScaler, MinMaxScaler

# from tqdm import tqdm

# def normalize_by_ids(df):
#     drivers = df['bookingID'].unique()
#     for driver in tqdm(drivers):
#         ids = df[df['bookingID'] == driver].index
#         scaler = MinMaxScaler()
#         df.loc[ids, features] = scaler.fit_transform(df.loc[ids, features])
        
# normalize_by_ids(sub_df)

In [0]:
#=======
# differentiation features
#=======
def fo(x):
    return np.diff(x)

def so(x):
    return np.diff(fo(x))

def fo_mean(x):
    # first order diff mean
    return np.mean(fo(x))

def so_mean(x):
    # second order diff mean
    return np.mean(so(x))         

def fo_min(x):
    # first order diff min
    return np.min(fo(x))

def so_min(x):
    # second order diff min
    return np.min(so(x))   

def fo_max(x):
    # first order diff max
    return np.max(fo(x))

def so_max(x):
    # second order diff max
    return np.max(so(x))  

def sign_change(x):
    return sum(np.diff(np.sign(x)) != 0)

In [0]:
def agg_features(df, new_df):
  
    # statistical features
    func = ('min', 'max', 'mean', 'std')
    
    exclude = ('label', 'bookingID', 'unique_counts')

    # performed on all features
    features = (c for c in df.columns if c not in exclude)   
    
    for f in features:
        new = df.groupby('bookingID')[f].agg(func)
        
        new.columns = [f + "_" + c for c in new.columns]
        
        new.reset_index(drop=False, inplace=True)
        
        new_df = pd.merge(new_df, new,
                          left_on='bookingID', right_on='bookingID',
                          how='inner')
    
    print('Done statistical features...')
    print('Now to differentiation features...')
    
    diff_func = (fo_mean, fo_min, fo_max,
                 so_mean, so_min, so_max,
                 sign_change)
    
    subfeatures = ('acceleration_x', 'acceleration_y', 'acceleration_z',
                   'gyro_x', 'gyro_y', 'gyro_z')
    
    for f in subfeatures:
        new = df.groupby('bookingID')[f].agg(diff_func)
        
        new.columns = [f + "_" + c for c in new.columns]
        
        new.reset_index(drop=False, inplace=True)
        
        new_df = pd.merge(new_df, new,
                          left_on='bookingID', right_on='bookingID',
                          how='inner')
        
                
    return new_df        

In [86]:
new_df = pd.DataFrame()

new_df['bookingID'] = ids

len(new_df)

19982

In [87]:
%%time
sample_new = agg_features(train_df, new_df)

Done statistical features...
Now to differentiation features...
CPU times: user 5min 26s, sys: 1.43 s, total: 5min 27s
Wall time: 5min 27s


In [89]:
sample_new.head()

Unnamed: 0,bookingID,Accuracy_min,Accuracy_max,Accuracy_mean,Accuracy_std,Bearing_min,Bearing_max,Bearing_mean,Bearing_std,acceleration_x_min,acceleration_x_max,acceleration_x_mean,acceleration_x_std,acceleration_y_min,acceleration_y_max,acceleration_y_mean,acceleration_y_std,acceleration_z_min,acceleration_z_max,acceleration_z_mean,acceleration_z_std,gyro_x_min,gyro_x_max,gyro_x_mean,gyro_x_std,gyro_y_min,gyro_y_max,gyro_y_mean,gyro_y_std,gyro_z_min,gyro_z_max,gyro_z_mean,gyro_z_std,second_min,second_max,second_mean,second_std,Speed_min,Speed_max,Speed_mean,...,acceleration_x_fo_max,acceleration_x_so_mean,acceleration_x_so_min,acceleration_x_so_max,acceleration_x_sign_change,acceleration_y_fo_mean,acceleration_y_fo_min,acceleration_y_fo_max,acceleration_y_so_mean,acceleration_y_so_min,acceleration_y_so_max,acceleration_y_sign_change,acceleration_z_fo_mean,acceleration_z_fo_min,acceleration_z_fo_max,acceleration_z_so_mean,acceleration_z_so_min,acceleration_z_so_max,acceleration_z_sign_change,gyro_x_fo_mean,gyro_x_fo_min,gyro_x_fo_max,gyro_x_so_mean,gyro_x_so_min,gyro_x_so_max,gyro_x_sign_change,gyro_y_fo_mean,gyro_y_fo_min,gyro_y_fo_max,gyro_y_so_mean,gyro_y_so_min,gyro_y_so_max,gyro_y_sign_change,gyro_z_fo_mean,gyro_z_fo_min,gyro_z_fo_max,gyro_z_so_mean,gyro_z_so_min,gyro_z_so_max,gyro_z_sign_change
0,0,4.0,48.0,10.165339,3.855898,0.037464,359.979767,176.526099,129.231351,-4.692294,4.782614,-0.711264,0.928022,-12.764703,-6.119916,-9.613822,0.639934,-6.251807,2.318857,-1.619658,1.141266,-0.392537,0.438371,0.003328,0.065954,-0.60993,0.469724,-0.006118,0.100225,-0.731892,0.372807,-0.004188,0.063685,0.0,1589.0,903.526892,533.745097,-1.0,22.946083,8.994822,...,4.873383,-0.00196,-10.184308,8.453427,278.0,0.000324,-3.953137,3.903342,0.000934,-7.856479,5.524164,0.0,0.00166,-4.960413,6.332556,0.002131,-9.415692,10.856174,144.0,6.2e-05,-0.470277,0.446838,-0.000139,-0.878883,0.792117,503.0,0.000133,-0.6092,0.632817,-3.7e-05,-0.982114,1.163994,519.0,-5.6e-05,-0.731597,0.860446,-5e-05,-1.010095,1.592042,489.0
1,1,3.0,7.709,3.718763,0.597933,0.0,337.0,124.19859,89.861236,-5.352994,3.813341,-0.525406,0.744157,6.623425,12.536156,9.532086,0.533915,-5.355455,1.481293,-2.198999,0.854271,-0.125367,0.126536,-0.002467,0.02774,-0.678183,0.470837,-0.00754,0.091699,-0.121265,0.235131,0.000405,0.033838,0.0,1034.0,581.175088,289.339367,-1.0,21.882141,7.881588,...,5.112138,0.001334,-8.5249,8.847361,180.0,-0.000785,-3.323172,3.170564,0.000634,-6.294559,6.425989,0.0,0.001054,-5.032708,5.158748,0.00254,-9.591842,7.498774,24.0,-5e-05,-0.179333,0.121126,-5.3e-05,-0.246702,0.291941,428.0,8e-06,-0.717767,0.596711,-0.000281,-0.782274,1.275626,433.0,-6.9e-05,-0.216438,0.275204,-4.8e-05,-0.484521,0.28311,415.0
2,2,3.0,8.0,3.930626,1.117354,1.0,354.0,173.794872,119.31652,-2.971295,1.956122,0.306786,0.756589,7.94181,13.333716,9.843183,0.505693,-3.282551,2.31287,0.139347,1.020021,-0.155218,0.282162,0.006458,0.053903,-0.462,0.431695,-0.012861,0.117321,-0.125907,0.255884,0.002597,0.036215,0.0,825.0,339.441026,356.319445,0.0,9.360483,3.157213,...,3.464516,0.01805,-5.013611,7.108602,80.0,0.001592,-3.490852,3.031151,0.003449,-6.522003,4.005621,0.0,0.004134,-5.375147,4.326454,0.006488,-7.043957,8.842057,88.0,-0.000356,-0.277944,0.298713,-0.000529,-0.576657,0.397062,98.0,0.001603,-0.638354,0.507018,-0.000608,-1.034806,0.967,101.0,-0.000482,-0.276111,0.258396,-0.000215,-0.534507,0.324369,113.0
3,4,10.0,10.0,10.0,0.0,2.271227,353.855377,151.807013,71.273774,-2.866458,2.019635,-0.365117,0.52722,-18.847833,-7.064984,-9.406439,0.598023,-9.374869,0.296381,-2.613639,0.779529,-0.420507,0.438732,-0.022884,0.042342,-0.506531,0.652628,0.023232,0.112567,-0.348189,0.50522,-0.000376,0.065927,0.0,1094.0,547.49543,315.962793,0.0,19.780001,6.150996,...,3.158801,0.001724,-5.133127,5.25455,299.0,0.000358,-9.794467,9.601715,-9.3e-05,-10.051071,19.396182,0.0,-0.001748,-8.202805,6.575403,0.001314,-10.440613,14.778207,4.0,8e-06,-0.50061,0.507631,-5.7e-05,-1.008241,0.800959,264.0,-0.000306,-0.57736,0.625146,0.00011,-1.186005,1.002399,519.0,-1.2e-05,-0.481751,0.561944,0.000108,-1.043695,0.714401,536.0
4,6,3.0,12.0,4.586721,1.329545,0.0,359.0,197.812785,111.868249,-4.352792,5.593026,0.490616,0.826271,6.46933,13.249917,9.538043,0.61721,-2.140482,7.977724,2.355059,0.942163,-0.253551,0.362202,0.003877,0.05517,-0.621802,0.532123,0.000436,0.106815,-0.348345,0.364534,0.00293,0.057438,0.0,1094.0,547.0,316.243577,0.0,16.394695,4.628921,...,6.431022,3.1e-05,-11.686455,9.854836,355.0,0.000635,-4.108576,3.265792,0.000416,-7.225922,6.960157,0.0,-0.00039,-7.489292,5.090228,-0.000526,-12.57952,10.458192,26.0,5.5e-05,-0.351248,0.410501,-1.2e-05,-0.725708,0.507018,537.0,1.2e-05,-0.654848,0.671341,2e-06,-1.158811,1.304197,520.0,-1.5e-05,-0.444099,0.354913,-7e-06,-0.728762,0.787405,547.0


In [59]:
len(new)

19982

In [90]:
vc = train_df.groupby('bookingID').head(1)['unique_counts'].values

sample_new['vc'] = vc

sample_new.head()

Unnamed: 0,bookingID,Accuracy_min,Accuracy_max,Accuracy_mean,Accuracy_std,Bearing_min,Bearing_max,Bearing_mean,Bearing_std,acceleration_x_min,acceleration_x_max,acceleration_x_mean,acceleration_x_std,acceleration_y_min,acceleration_y_max,acceleration_y_mean,acceleration_y_std,acceleration_z_min,acceleration_z_max,acceleration_z_mean,acceleration_z_std,gyro_x_min,gyro_x_max,gyro_x_mean,gyro_x_std,gyro_y_min,gyro_y_max,gyro_y_mean,gyro_y_std,gyro_z_min,gyro_z_max,gyro_z_mean,gyro_z_std,second_min,second_max,second_mean,second_std,Speed_min,Speed_max,Speed_mean,...,acceleration_x_so_mean,acceleration_x_so_min,acceleration_x_so_max,acceleration_x_sign_change,acceleration_y_fo_mean,acceleration_y_fo_min,acceleration_y_fo_max,acceleration_y_so_mean,acceleration_y_so_min,acceleration_y_so_max,acceleration_y_sign_change,acceleration_z_fo_mean,acceleration_z_fo_min,acceleration_z_fo_max,acceleration_z_so_mean,acceleration_z_so_min,acceleration_z_so_max,acceleration_z_sign_change,gyro_x_fo_mean,gyro_x_fo_min,gyro_x_fo_max,gyro_x_so_mean,gyro_x_so_min,gyro_x_so_max,gyro_x_sign_change,gyro_y_fo_mean,gyro_y_fo_min,gyro_y_fo_max,gyro_y_so_mean,gyro_y_so_min,gyro_y_so_max,gyro_y_sign_change,gyro_z_fo_mean,gyro_z_fo_min,gyro_z_fo_max,gyro_z_so_mean,gyro_z_so_min,gyro_z_so_max,gyro_z_sign_change,vc
0,0,4.0,48.0,10.165339,3.855898,0.037464,359.979767,176.526099,129.231351,-4.692294,4.782614,-0.711264,0.928022,-12.764703,-6.119916,-9.613822,0.639934,-6.251807,2.318857,-1.619658,1.141266,-0.392537,0.438371,0.003328,0.065954,-0.60993,0.469724,-0.006118,0.100225,-0.731892,0.372807,-0.004188,0.063685,0.0,1589.0,903.526892,533.745097,-1.0,22.946083,8.994822,...,-0.00196,-10.184308,8.453427,278.0,0.000324,-3.953137,3.903342,0.000934,-7.856479,5.524164,0.0,0.00166,-4.960413,6.332556,0.002131,-9.415692,10.856174,144.0,6.2e-05,-0.470277,0.446838,-0.000139,-0.878883,0.792117,503.0,0.000133,-0.6092,0.632817,-3.7e-05,-0.982114,1.163994,519.0,-5.6e-05,-0.731597,0.860446,-5e-05,-1.010095,1.592042,489.0,1004
1,1,3.0,7.709,3.718763,0.597933,0.0,337.0,124.19859,89.861236,-5.352994,3.813341,-0.525406,0.744157,6.623425,12.536156,9.532086,0.533915,-5.355455,1.481293,-2.198999,0.854271,-0.125367,0.126536,-0.002467,0.02774,-0.678183,0.470837,-0.00754,0.091699,-0.121265,0.235131,0.000405,0.033838,0.0,1034.0,581.175088,289.339367,-1.0,21.882141,7.881588,...,0.001334,-8.5249,8.847361,180.0,-0.000785,-3.323172,3.170564,0.000634,-6.294559,6.425989,0.0,0.001054,-5.032708,5.158748,0.00254,-9.591842,7.498774,24.0,-5e-05,-0.179333,0.121126,-5.3e-05,-0.246702,0.291941,428.0,8e-06,-0.717767,0.596711,-0.000281,-0.782274,1.275626,433.0,-6.9e-05,-0.216438,0.275204,-4.8e-05,-0.484521,0.28311,415.0,851
2,2,3.0,8.0,3.930626,1.117354,1.0,354.0,173.794872,119.31652,-2.971295,1.956122,0.306786,0.756589,7.94181,13.333716,9.843183,0.505693,-3.282551,2.31287,0.139347,1.020021,-0.155218,0.282162,0.006458,0.053903,-0.462,0.431695,-0.012861,0.117321,-0.125907,0.255884,0.002597,0.036215,0.0,825.0,339.441026,356.319445,0.0,9.360483,3.157213,...,0.01805,-5.013611,7.108602,80.0,0.001592,-3.490852,3.031151,0.003449,-6.522003,4.005621,0.0,0.004134,-5.375147,4.326454,0.006488,-7.043957,8.842057,88.0,-0.000356,-0.277944,0.298713,-0.000529,-0.576657,0.397062,98.0,0.001603,-0.638354,0.507018,-0.000608,-1.034806,0.967,101.0,-0.000482,-0.276111,0.258396,-0.000215,-0.534507,0.324369,113.0,195
3,4,10.0,10.0,10.0,0.0,2.271227,353.855377,151.807013,71.273774,-2.866458,2.019635,-0.365117,0.52722,-18.847833,-7.064984,-9.406439,0.598023,-9.374869,0.296381,-2.613639,0.779529,-0.420507,0.438732,-0.022884,0.042342,-0.506531,0.652628,0.023232,0.112567,-0.348189,0.50522,-0.000376,0.065927,0.0,1094.0,547.49543,315.962793,0.0,19.780001,6.150996,...,0.001724,-5.133127,5.25455,299.0,0.000358,-9.794467,9.601715,-9.3e-05,-10.051071,19.396182,0.0,-0.001748,-8.202805,6.575403,0.001314,-10.440613,14.778207,4.0,8e-06,-0.50061,0.507631,-5.7e-05,-1.008241,0.800959,264.0,-0.000306,-0.57736,0.625146,0.00011,-1.186005,1.002399,519.0,-1.2e-05,-0.481751,0.561944,0.000108,-1.043695,0.714401,536.0,1094
4,6,3.0,12.0,4.586721,1.329545,0.0,359.0,197.812785,111.868249,-4.352792,5.593026,0.490616,0.826271,6.46933,13.249917,9.538043,0.61721,-2.140482,7.977724,2.355059,0.942163,-0.253551,0.362202,0.003877,0.05517,-0.621802,0.532123,0.000436,0.106815,-0.348345,0.364534,0.00293,0.057438,0.0,1094.0,547.0,316.243577,0.0,16.394695,4.628921,...,3.1e-05,-11.686455,9.854836,355.0,0.000635,-4.108576,3.265792,0.000416,-7.225922,6.960157,0.0,-0.00039,-7.489292,5.090228,-0.000526,-12.57952,10.458192,26.0,5.5e-05,-0.351248,0.410501,-1.2e-05,-0.725708,0.507018,537.0,1.2e-05,-0.654848,0.671341,2e-06,-1.158811,1.304197,520.0,-1.5e-05,-0.444099,0.354913,-7e-06,-0.728762,0.787405,547.0,1095


In [0]:
def add_features(df, arr, f):
    for i in range(TOP_N):
        var = f + '_' + str(i+1)
        df[var] = arr[:, i]

# add the TOP_N accuracy features to the merged df
add_features(new, TOP_N_SPEED, 'Speed')
add_features(new, TOP_N_HP, 'horsepower')

In [0]:
# #     var = 'acceleration_x_' + str(i)
# #     new[var] = TOP_3_A_X[:,i]
#     var = 'Speed_' + str(i)
#     new[var] = TOP_N_SPEED[:,i]
    
#     var = 'horsepower_' + str(i)
#     new[var] = TOP_N_HP[:,i]

In [62]:
new.head()

Unnamed: 0,bookingID,Accuracy_mean,Accuracy_min,Accuracy_max,Accuracy_std,Accuracy_fo_mean,Accuracy_so_mean,Accuracy_fo_min,Accuracy_so_min,Accuracy_fo_max,Accuracy_so_max,Bearing_mean,Bearing_min,Bearing_max,Bearing_std,Bearing_fo_mean,Bearing_so_mean,Bearing_fo_min,Bearing_so_min,Bearing_fo_max,Bearing_so_max,acceleration_x_mean,acceleration_x_min,acceleration_x_max,acceleration_x_std,acceleration_x_fo_mean,acceleration_x_so_mean,acceleration_x_fo_min,acceleration_x_so_min,acceleration_x_fo_max,acceleration_x_so_max,acceleration_y_mean,acceleration_y_min,acceleration_y_max,acceleration_y_std,acceleration_y_fo_mean,acceleration_y_so_mean,acceleration_y_fo_min,acceleration_y_so_min,acceleration_y_fo_max,...,total_gyro_so_min,total_gyro_fo_max,total_gyro_so_max,roll_mean,roll_min,roll_max,roll_std,roll_fo_mean,roll_so_mean,roll_fo_min,roll_so_min,roll_fo_max,roll_so_max,pitch_mean,pitch_min,pitch_max,pitch_std,pitch_fo_mean,pitch_so_mean,pitch_fo_min,pitch_so_min,pitch_fo_max,pitch_so_max,horsepower_mean,horsepower_min,horsepower_max,horsepower_std,horsepower_fo_mean,horsepower_so_mean,horsepower_fo_min,horsepower_so_min,horsepower_fo_max,horsepower_so_max,vc,Speed_1,Speed_1_2,Speed_1_2_3,horsepower_1,horsepower_1_2,horsepower_1_2_3
0,0,10.165339,4.0,48.0,3.855898,-0.003988036,0.003992,-36.0,-68.0,32.0,32.0,176.526099,0.037464,359.979767,129.231351,-0.101603,-0.280284,-356.680084,-678.707878,357.773807,697.542567,-0.711264,-4.692294,4.782614,0.928022,-0.001451,-0.00196,-5.310925,-10.184308,4.873383,8.453427,-9.613822,-12.764703,-6.119916,0.639934,0.000324,0.000934,-3.953137,-7.856479,3.903342,...,-1.174189,0.660225,0.732735,-1.737242,-2.229817,-1.347602,0.116742,0.000171,0.000202,-0.470042,-0.940743,0.615583,1.047816,0.073598,-0.425732,0.447192,0.094192,0.000157,0.00021,-0.441226,-0.853901,0.505558,0.9185,89.169004,-10.169692,252.090958,71.968693,0.08359973,-0.04773332,-232.745306,-460.166618,231.245321,400.555202,1004,-1.0,10.590596,0.162426,-10.169692,131.70201,1.613122
1,1,3.718763,3.0,7.709,0.597933,0.001058824,-0.001296,-4.414,-8.828,4.709,6.709,124.19859,0.0,337.0,89.861236,0.038824,-0.047114,-306.0,-559.0,304.0,610.0,-0.525406,-5.352994,3.813341,0.744157,-0.000824,0.001334,-4.131128,-8.5249,5.112138,8.847361,9.532086,6.623425,12.536156,0.533915,-0.000785,0.000634,-3.323172,-6.294559,3.170564,...,-1.23569,0.641245,0.657876,1.797626,1.434715,2.13734,0.088049,-9.6e-05,-0.000285,-0.537245,-0.783427,0.523479,0.965273,0.053107,-0.421987,0.512452,0.074387,9.9e-05,-0.000123,-0.488136,-0.832619,0.452367,0.931832,77.562771,-10.135306,233.266532,69.48984,0.1848545,-0.1158458,-225.490708,-453.96089,233.169493,411.608531,851,-1.0,-1.0,-1.0,-9.197917,-9.093537,-9.465303
2,2,3.930626,3.0,8.0,1.117354,0.004639175,0.001793,-4.0,-8.345,5.0,6.0,173.794872,1.0,354.0,119.31652,1.010309,1.388601,-337.0,-676.0,339.0,590.0,0.306786,-2.971295,1.956122,0.756589,0.003345,0.01805,-3.644086,-5.013611,3.464516,7.108602,9.843183,7.94181,13.333716,0.505693,0.001592,0.003449,-3.490852,-6.522003,3.031151,...,-0.806141,0.407373,0.56315,1.556422,1.32823,1.891112,0.10286,-0.000391,-0.000621,-0.436176,-0.777169,0.47842,0.635099,-0.031534,-0.20288,0.260122,0.075406,-0.000304,-0.001784,-0.310418,-0.639641,0.329223,0.490599,31.450526,0.0,104.777363,29.000906,0.4596758,0.4409695,-93.238772,-186.477544,93.238772,173.380469,195,9.002391,9.257438,8.185837,92.66759,104.777363,83.435178
3,4,10.0,10.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,151.807013,2.271227,353.855377,71.273774,0.024552,0.006779,-300.273964,-508.312253,331.624398,594.888779,-0.365117,-2.866458,2.019635,0.52722,-0.001801,0.001724,-3.085678,-5.133127,3.158801,5.25455,-9.406439,-18.847833,-7.064984,0.598023,0.000358,-9.3e-05,-9.794467,-10.051071,9.601715,...,-1.165823,0.625971,0.967315,-1.841256,-2.107056,-1.537207,0.077592,-0.000184,0.000134,-0.366293,-0.728194,0.371167,0.595339,0.037733,-0.188548,0.329725,0.05438,0.000184,-0.000176,-0.325465,-0.635692,0.368603,0.512254,60.619916,0.0,385.485278,56.243974,-0.01538936,-0.01304496,-290.200024,-557.572728,267.372704,338.218808,1094,0.19,5.6,2.33,1.842166,60.358278,22.639851
4,6,4.586721,3.0,12.0,1.329545,-1.623727e-18,0.001917,-8.0,-15.72,9.0,11.225,197.812785,0.0,359.0,111.868249,-0.256856,-0.169259,-328.0,-640.0,337.0,602.0,0.490616,-4.352792,5.593026,0.826271,-0.000269,3.1e-05,-5.255433,-11.686455,6.431022,9.854836,9.538043,6.46933,13.249917,0.61721,0.000635,0.000416,-4.108576,-7.225922,3.265792,...,-1.19679,0.613616,0.840368,1.329015,0.903251,1.861424,0.097058,5.5e-05,6.3e-05,-0.498595,-1.169826,0.698923,1.0044,-0.050947,-0.577149,0.455444,0.084303,3e-05,-2e-06,-0.657506,-1.022886,0.54394,1.201446,46.151636,0.0,212.361351,53.504948,1.2989810000000001e-17,-6.630865e-16,-212.361351,-424.722703,212.361351,379.798831,1095,4.268571,6.143076,5.507971,41.553927,61.59175,52.814251


In [93]:
new_sub = pd.merge(sample_new, labels_df,
                  left_on='bookingID', right_on='bookingID',
                  how='inner')

new_sub.head()

Unnamed: 0,bookingID,Accuracy_min,Accuracy_max,Accuracy_mean,Accuracy_std,Bearing_min,Bearing_max,Bearing_mean,Bearing_std,acceleration_x_min,acceleration_x_max,acceleration_x_mean,acceleration_x_std,acceleration_y_min,acceleration_y_max,acceleration_y_mean,acceleration_y_std,acceleration_z_min,acceleration_z_max,acceleration_z_mean,acceleration_z_std,gyro_x_min,gyro_x_max,gyro_x_mean,gyro_x_std,gyro_y_min,gyro_y_max,gyro_y_mean,gyro_y_std,gyro_z_min,gyro_z_max,gyro_z_mean,gyro_z_std,second_min,second_max,second_mean,second_std,Speed_min,Speed_max,Speed_mean,...,acceleration_x_so_min,acceleration_x_so_max,acceleration_x_sign_change,acceleration_y_fo_mean,acceleration_y_fo_min,acceleration_y_fo_max,acceleration_y_so_mean,acceleration_y_so_min,acceleration_y_so_max,acceleration_y_sign_change,acceleration_z_fo_mean,acceleration_z_fo_min,acceleration_z_fo_max,acceleration_z_so_mean,acceleration_z_so_min,acceleration_z_so_max,acceleration_z_sign_change,gyro_x_fo_mean,gyro_x_fo_min,gyro_x_fo_max,gyro_x_so_mean,gyro_x_so_min,gyro_x_so_max,gyro_x_sign_change,gyro_y_fo_mean,gyro_y_fo_min,gyro_y_fo_max,gyro_y_so_mean,gyro_y_so_min,gyro_y_so_max,gyro_y_sign_change,gyro_z_fo_mean,gyro_z_fo_min,gyro_z_fo_max,gyro_z_so_mean,gyro_z_so_min,gyro_z_so_max,gyro_z_sign_change,vc,label
0,0,4.0,48.0,10.165339,3.855898,0.037464,359.979767,176.526099,129.231351,-4.692294,4.782614,-0.711264,0.928022,-12.764703,-6.119916,-9.613822,0.639934,-6.251807,2.318857,-1.619658,1.141266,-0.392537,0.438371,0.003328,0.065954,-0.60993,0.469724,-0.006118,0.100225,-0.731892,0.372807,-0.004188,0.063685,0.0,1589.0,903.526892,533.745097,-1.0,22.946083,8.994822,...,-10.184308,8.453427,278.0,0.000324,-3.953137,3.903342,0.000934,-7.856479,5.524164,0.0,0.00166,-4.960413,6.332556,0.002131,-9.415692,10.856174,144.0,6.2e-05,-0.470277,0.446838,-0.000139,-0.878883,0.792117,503.0,0.000133,-0.6092,0.632817,-3.7e-05,-0.982114,1.163994,519.0,-5.6e-05,-0.731597,0.860446,-5e-05,-1.010095,1.592042,489.0,1004,0
1,1,3.0,7.709,3.718763,0.597933,0.0,337.0,124.19859,89.861236,-5.352994,3.813341,-0.525406,0.744157,6.623425,12.536156,9.532086,0.533915,-5.355455,1.481293,-2.198999,0.854271,-0.125367,0.126536,-0.002467,0.02774,-0.678183,0.470837,-0.00754,0.091699,-0.121265,0.235131,0.000405,0.033838,0.0,1034.0,581.175088,289.339367,-1.0,21.882141,7.881588,...,-8.5249,8.847361,180.0,-0.000785,-3.323172,3.170564,0.000634,-6.294559,6.425989,0.0,0.001054,-5.032708,5.158748,0.00254,-9.591842,7.498774,24.0,-5e-05,-0.179333,0.121126,-5.3e-05,-0.246702,0.291941,428.0,8e-06,-0.717767,0.596711,-0.000281,-0.782274,1.275626,433.0,-6.9e-05,-0.216438,0.275204,-4.8e-05,-0.484521,0.28311,415.0,851,1
2,2,3.0,8.0,3.930626,1.117354,1.0,354.0,173.794872,119.31652,-2.971295,1.956122,0.306786,0.756589,7.94181,13.333716,9.843183,0.505693,-3.282551,2.31287,0.139347,1.020021,-0.155218,0.282162,0.006458,0.053903,-0.462,0.431695,-0.012861,0.117321,-0.125907,0.255884,0.002597,0.036215,0.0,825.0,339.441026,356.319445,0.0,9.360483,3.157213,...,-5.013611,7.108602,80.0,0.001592,-3.490852,3.031151,0.003449,-6.522003,4.005621,0.0,0.004134,-5.375147,4.326454,0.006488,-7.043957,8.842057,88.0,-0.000356,-0.277944,0.298713,-0.000529,-0.576657,0.397062,98.0,0.001603,-0.638354,0.507018,-0.000608,-1.034806,0.967,101.0,-0.000482,-0.276111,0.258396,-0.000215,-0.534507,0.324369,113.0,195,1
3,4,10.0,10.0,10.0,0.0,2.271227,353.855377,151.807013,71.273774,-2.866458,2.019635,-0.365117,0.52722,-18.847833,-7.064984,-9.406439,0.598023,-9.374869,0.296381,-2.613639,0.779529,-0.420507,0.438732,-0.022884,0.042342,-0.506531,0.652628,0.023232,0.112567,-0.348189,0.50522,-0.000376,0.065927,0.0,1094.0,547.49543,315.962793,0.0,19.780001,6.150996,...,-5.133127,5.25455,299.0,0.000358,-9.794467,9.601715,-9.3e-05,-10.051071,19.396182,0.0,-0.001748,-8.202805,6.575403,0.001314,-10.440613,14.778207,4.0,8e-06,-0.50061,0.507631,-5.7e-05,-1.008241,0.800959,264.0,-0.000306,-0.57736,0.625146,0.00011,-1.186005,1.002399,519.0,-1.2e-05,-0.481751,0.561944,0.000108,-1.043695,0.714401,536.0,1094,1
4,6,3.0,12.0,4.586721,1.329545,0.0,359.0,197.812785,111.868249,-4.352792,5.593026,0.490616,0.826271,6.46933,13.249917,9.538043,0.61721,-2.140482,7.977724,2.355059,0.942163,-0.253551,0.362202,0.003877,0.05517,-0.621802,0.532123,0.000436,0.106815,-0.348345,0.364534,0.00293,0.057438,0.0,1094.0,547.0,316.243577,0.0,16.394695,4.628921,...,-11.686455,9.854836,355.0,0.000635,-4.108576,3.265792,0.000416,-7.225922,6.960157,0.0,-0.00039,-7.489292,5.090228,-0.000526,-12.57952,10.458192,26.0,5.5e-05,-0.351248,0.410501,-1.2e-05,-0.725708,0.507018,537.0,1.2e-05,-0.654848,0.671341,2e-06,-1.158811,1.304197,520.0,-1.5e-05,-0.444099,0.354913,-7e-06,-0.728762,0.787405,547.0,1095,0


In [0]:
new_sub.to_pickle(f'{PATH_FEATURES}/sample_new_103.pkl')