# "Help Navigate Robots"
> "Help robots recognize the floor surface they’re standing on using data collected from Inertial Measurement Units"
- toc: false
- branch: master
- badges: true
- comments: true
- categories: [fastpages, jupyter]
- image: images/some_folder/your_image.png
- hide: false
- search_exclude: true
- metadata_key1: metadata_value1
- metadata_key2: metadata_value2

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix

In [12]:
df = pd.read_csv("X_train.csv")
df2 = pd.read_csv("y_train.csv")

In [13]:
df.head()

Unnamed: 0,row_id,series_id,measurement_number,orientation_X,orientation_Y,orientation_Z,orientation_W,angular_velocity_X,angular_velocity_Y,angular_velocity_Z,linear_acceleration_X,linear_acceleration_Y,linear_acceleration_Z
0,0_0,0,0,-0.75853,-0.63435,-0.10488,-0.10597,0.10765,0.017561,0.000767,-0.74857,2.103,-9.7532
1,0_1,0,1,-0.75853,-0.63434,-0.1049,-0.106,0.067851,0.029939,0.003385,0.33995,1.5064,-9.4128
2,0_2,0,2,-0.75853,-0.63435,-0.10492,-0.10597,0.007275,0.028934,-0.005978,-0.26429,1.5922,-8.7267
3,0_3,0,3,-0.75852,-0.63436,-0.10495,-0.10597,-0.013053,0.019448,-0.008974,0.42684,1.0993,-10.096
4,0_4,0,4,-0.75852,-0.63435,-0.10495,-0.10596,0.005135,0.007652,0.005245,-0.50969,1.4689,-10.441


In [14]:
df2.head()

Unnamed: 0,series_id,group_id,surface
0,0,13,fine_concrete
1,1,31,concrete
2,2,20,concrete
3,3,31,concrete
4,4,22,soft_tiles


In [15]:
df2.shape

(3810, 3)

## Feature Engineering

In [16]:
df['orientation_T'] = (df['orientation_X']**2 + df['orientation_Y']**2 +
                               df['orientation_Z']**2)**0.5
df['linear_acceleration_T'] = (df['linear_acceleration_X']**2 + df['linear_acceleration_Y']**2 +
                               df['linear_acceleration_Z']**2)**0.5
df['angular_velocity_T'] = (df['angular_velocity_X']**2 + df['angular_velocity_Y']**2 +
                               df['angular_velocity_Z']**2)**0.5

In [17]:
df_agg = df.groupby(['series_id']).agg({'orientation_X': ['mean', min, max,sum],
                                       'orientation_Y': ['mean', min ,max,sum],
                                       'orientation_Z': ['mean', min ,max,sum],
                                        'orientation_W': ['mean', min ,max,sum],
                                        'orientation_T': ['mean', min ,max,sum],
                                       'linear_acceleration_X': ['mean', min ,max,sum],
                                       'linear_acceleration_Y': ['mean', min ,max,sum],
                                       'linear_acceleration_Z': ['mean', min ,max,sum],
                                        'linear_acceleration_T': ['mean', min ,max,sum],
                                       'angular_velocity_X': ['mean', min ,max,sum],
                                       'angular_velocity_Y': ['mean', min ,max,sum],
                                       'angular_velocity_Z': ['mean', min ,max,sum],
                                       'angular_velocity_T': ['mean', min ,max,sum]})

In [18]:
list(df_agg)

[('orientation_X', 'mean'),
 ('orientation_X', 'min'),
 ('orientation_X', 'max'),
 ('orientation_X', 'sum'),
 ('orientation_Y', 'mean'),
 ('orientation_Y', 'min'),
 ('orientation_Y', 'max'),
 ('orientation_Y', 'sum'),
 ('orientation_Z', 'mean'),
 ('orientation_Z', 'min'),
 ('orientation_Z', 'max'),
 ('orientation_Z', 'sum'),
 ('orientation_W', 'mean'),
 ('orientation_W', 'min'),
 ('orientation_W', 'max'),
 ('orientation_W', 'sum'),
 ('orientation_T', 'mean'),
 ('orientation_T', 'min'),
 ('orientation_T', 'max'),
 ('orientation_T', 'sum'),
 ('linear_acceleration_X', 'mean'),
 ('linear_acceleration_X', 'min'),
 ('linear_acceleration_X', 'max'),
 ('linear_acceleration_X', 'sum'),
 ('linear_acceleration_Y', 'mean'),
 ('linear_acceleration_Y', 'min'),
 ('linear_acceleration_Y', 'max'),
 ('linear_acceleration_Y', 'sum'),
 ('linear_acceleration_Z', 'mean'),
 ('linear_acceleration_Z', 'min'),
 ('linear_acceleration_Z', 'max'),
 ('linear_acceleration_Z', 'sum'),
 ('linear_acceleration_T', 'mean

In [19]:
df_agg[('orientation_X','range')] = df_agg[('orientation_X', 'max')] - df_agg[('orientation_X', 'min')]
df_agg[('orientation_Y','range')] = df_agg[('orientation_Y', 'max')] - df_agg[('orientation_Y', 'min')]
df_agg[('orientation_Z','range')] = df_agg[('orientation_Z', 'max')] - df_agg[('orientation_Z', 'min')]
df_agg[('orientation_W','range')] = df_agg[('orientation_W', 'max')] - df_agg[('orientation_W', 'min')]
df_agg[('orientation_T','range')] = df_agg[('orientation_T', 'max')] - df_agg[('orientation_T', 'min')]
df_agg[('linear_acceleration_X','range')] = df_agg[('linear_acceleration_X', 'max')] - df_agg[('linear_acceleration_X', 'min')]
df_agg[('linear_acceleration_Y','range')] = df_agg[('linear_acceleration_Y', 'max')] - df_agg[('linear_acceleration_Y', 'min')]
df_agg[('linear_acceleration_Z','range')] = df_agg[('linear_acceleration_Z', 'max')] - df_agg[('linear_acceleration_Z', 'min')]
df_agg[('linear_acceleration_T','range')] = df_agg[('linear_acceleration_T', 'max')] - df_agg[('linear_acceleration_T', 'min')]
df_agg[('angular_velocity_X','range')] = df_agg[('angular_velocity_X', 'max')] - df_agg[('angular_velocity_X', 'min')]
df_agg[('angular_velocity_Y','range')] = df_agg[('angular_velocity_Y', 'max')] - df_agg[('angular_velocity_Y', 'min')]
df_agg[('angular_velocity_Z','range')] = df_agg[('angular_velocity_Z', 'max')] - df_agg[('angular_velocity_Z', 'min')]
df_agg[('angular_velocity_T','range')] = df_agg[('angular_velocity_T', 'max')] - df_agg[('angular_velocity_T', 'min')]

In [20]:
df_agg.head()

Unnamed: 0_level_0,orientation_X,orientation_X,orientation_X,orientation_X,orientation_Y,orientation_Y,orientation_Y,orientation_Y,orientation_Z,orientation_Z,...,orientation_W,orientation_T,linear_acceleration_X,linear_acceleration_Y,linear_acceleration_Z,linear_acceleration_T,angular_velocity_X,angular_velocity_Y,angular_velocity_Z,angular_velocity_T
Unnamed: 0_level_1,mean,min,max,sum,mean,min,max,sum,mean,min,...,range,range,range,range,range,range,range,range,range,range
series_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0,-0.758666,-0.75953,-0.75822,-97.10922,-0.634008,-0.63456,-0.63306,-81.15298,-0.105474,-0.10614,...,0.00146,0.00016,4.7182,5.310983,6.2439,5.458424,0.26806,0.152102,0.081901,0.151781
1,-0.958606,-0.95896,-0.95837,-122.70162,0.241867,0.24074,0.2427,30.95897,0.03165,0.030504,...,0.00222,0.000331,8.2936,8.8342,14.1831,11.118339,0.53822,0.24641,0.25076,0.267779
2,-0.512057,-0.51434,-0.50944,-65.54334,-0.846171,-0.84779,-0.8449,-108.30988,-0.129371,-0.1303,...,0.001157,7.8e-05,4.4463,7.4645,6.7548,5.931175,0.29463,0.199756,0.104427,0.161048
3,-0.939169,-0.93968,-0.93884,-120.21364,0.31014,0.30943,0.31147,39.69794,0.038955,0.037922,...,0.00503,0.000717,7.9966,17.5681,19.2859,18.769065,0.92065,0.30393,0.158759,0.509122
4,-0.891301,-0.89689,-0.88673,-114.08647,0.428144,0.41646,0.4374,54.80246,0.060056,0.058247,...,0.00194,0.000272,2.09851,4.47603,3.526,3.413774,0.184974,0.075533,0.150568,0.140159


In [21]:
df_agg.shape

(3810, 65)

In [22]:
le = LabelEncoder()

In [23]:
le.fit(df2['surface'])

LabelEncoder()

In [24]:
df2['surface']= le.transform(df2['surface'])

In [25]:
df2.head()

Unnamed: 0,series_id,group_id,surface
0,0,13,2
1,1,31,1
2,2,20,1
3,3,31,1
4,4,22,6


## Modelling

In [26]:
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(df_agg, df2['surface'], test_size=0.3) # 70% training and 30% test

In [31]:
clf = RandomForestClassifier(n_estimators=100,n_jobs=-1,min_samples_leaf = 3)
clf.fit(X_train,y_train)
clf_predictions = clf.predict(X_test) 
clf.score(X_test,y_test)

0.8442694663167104

In [32]:
clf.score(X_train,y_train)

0.9835020622422197

In [33]:
feature_importances = pd.DataFrame(clf.feature_importances_,
                                   index = X_train.columns,
                                    columns=['importance']).sort_values('importance',ascending=False)
feature_importances

Unnamed: 0,Unnamed: 1,importance
orientation_X,min,0.037489
orientation_X,max,0.036277
orientation_X,mean,0.035278
orientation_W,mean,0.028306
orientation_T,mean,0.027860
linear_acceleration_X,range,0.027695
orientation_X,sum,0.027230
orientation_W,sum,0.026694
orientation_W,min,0.026412
orientation_Z,mean,0.025123


In [34]:
cm = confusion_matrix(y_test, clf_predictions)

In [35]:
print(cm)

[[ 39   4   0   0   0   2   3   1   3]
 [  4 194   3   0   6   2   3   7   7]
 [  0  11  88   0   1   5   3   3   6]
 [  0   0   0   4   0   0   0   0   1]
 [  4   4   0   0  78   1   0   0   6]
 [  2   7   1   0   1 211   4   2   5]
 [  1   4   2   0   0   6  74   0   4]
 [  1   9   1   0   1   4   2 130   7]
 [  0   5   3   0   0   8   2   6 147]]


In [54]:
le.classes_

array(['carpet', 'concrete', 'fine_concrete', 'hard_tiles',
       'hard_tiles_large_space', 'soft_pvc', 'soft_tiles', 'tiled',
       'wood'], dtype=object)

## Score the test dataset and create the submission file

In [70]:
sub = pd.read_csv('X_test.csv')

In [71]:
sub.head()

Unnamed: 0,row_id,series_id,measurement_number,orientation_X,orientation_Y,orientation_Z,orientation_W,angular_velocity_X,angular_velocity_Y,angular_velocity_Z,linear_acceleration_X,linear_acceleration_Y,linear_acceleration_Z
0,0_0,0,0,-0.025773,-0.98864,-0.14801,0.00335,-0.006524,-0.001071,-0.02739,0.10043,4.2061,-5.5439
1,0_1,0,1,-0.025683,-0.98862,-0.14816,0.003439,-0.11396,0.083987,-0.06059,-0.70889,3.9905,-8.0273
2,0_2,0,2,-0.025617,-0.98861,-0.14826,0.003571,-0.080518,0.11486,-0.037177,1.4571,2.2828,-11.299
3,0_3,0,3,-0.025566,-0.98862,-0.14817,0.003609,0.070067,0.03382,-0.035904,0.71096,1.8582,-12.227
4,0_4,0,4,-0.025548,-0.98866,-0.14792,0.003477,0.15205,-0.029016,-0.015314,3.3996,2.7881,-10.41


In [87]:
df_agg_test = sub.groupby(['series_id']).agg({'orientation_X': ['mean', min ,max],
                                       'orientation_Y': ['mean', min ,max],
                                       'orientation_Z': ['mean', min ,max],
                                        'orientation_W': ['mean', min ,max],
                                       'linear_acceleration_X': ['mean', min ,max],
                                       'linear_acceleration_Y': ['mean', min ,max],
                                       'linear_acceleration_Z': ['mean', min ,max],
                                       'angular_velocity_X': ['mean', min ,max],
                                       'angular_velocity_Y': ['mean', min ,max],
                                       'angular_velocity_Z': ['mean', min ,max]})

In [88]:
df_agg_test.head()

Unnamed: 0_level_0,orientation_X,orientation_X,orientation_X,orientation_Y,orientation_Y,orientation_Y,orientation_Z,orientation_Z,orientation_Z,orientation_W,...,linear_acceleration_Z,angular_velocity_X,angular_velocity_X,angular_velocity_X,angular_velocity_Y,angular_velocity_Y,angular_velocity_Y,angular_velocity_Z,angular_velocity_Z,angular_velocity_Z
Unnamed: 0_level_1,mean,min,max,mean,min,max,mean,min,max,mean,...,max,mean,min,max,mean,min,max,mean,min,max
series_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0,-0.02581,-0.026418,-0.025156,-0.988644,-0.98873,-0.98854,-0.148006,-0.14872,-0.14748,0.003147,...,-3.996,0.000994,-0.22561,0.23127,0.002629,-0.13797,0.11486,-0.002971,-0.10259,0.097635
1,-0.932288,-0.93372,-0.93148,0.330271,0.32661,0.33227,0.043416,0.042283,0.044053,-0.140968,...,-3.6473,0.010602,-0.13198,0.28485,0.014625,-0.04189,0.084099,-0.0369,-0.10556,0.037424
2,-0.230186,-0.23141,-0.22713,0.961448,0.96109,0.96217,0.14434,0.14356,0.14511,-0.042394,...,-2.1986,-0.003484,-0.30282,0.25024,0.009079,-0.16116,0.15459,-0.026607,-0.28306,0.13802
3,0.164661,0.16332,0.1675,0.975293,0.97485,0.97551,0.146153,0.14582,0.14649,0.018096,...,-7.8266,0.004221,-0.046763,0.066896,-0.006815,-0.027883,0.015618,0.026272,-0.030321,0.081661
4,-0.2536,-0.26938,-0.23637,0.955712,0.9515,0.96018,0.142326,0.14112,0.14306,-0.044067,...,-8.9277,0.005279,-0.029431,0.052902,0.069537,0.054452,0.09166,-0.211458,-0.2401,-0.1645


In [89]:
df_agg_test['prediction']=clf.predict(df_agg_test)

In [90]:
df_agg_test.head()

Unnamed: 0_level_0,orientation_X,orientation_X,orientation_X,orientation_Y,orientation_Y,orientation_Y,orientation_Z,orientation_Z,orientation_Z,orientation_W,...,angular_velocity_X,angular_velocity_X,angular_velocity_X,angular_velocity_Y,angular_velocity_Y,angular_velocity_Y,angular_velocity_Z,angular_velocity_Z,angular_velocity_Z,prediction
Unnamed: 0_level_1,mean,min,max,mean,min,max,mean,min,max,mean,...,mean,min,max,mean,min,max,mean,min,max,Unnamed: 21_level_1
series_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0,-0.02581,-0.026418,-0.025156,-0.988644,-0.98873,-0.98854,-0.148006,-0.14872,-0.14748,0.003147,...,0.000994,-0.22561,0.23127,0.002629,-0.13797,0.11486,-0.002971,-0.10259,0.097635,4
1,-0.932288,-0.93372,-0.93148,0.330271,0.32661,0.33227,0.043416,0.042283,0.044053,-0.140968,...,0.010602,-0.13198,0.28485,0.014625,-0.04189,0.084099,-0.0369,-0.10556,0.037424,1
2,-0.230186,-0.23141,-0.22713,0.961448,0.96109,0.96217,0.14434,0.14356,0.14511,-0.042394,...,-0.003484,-0.30282,0.25024,0.009079,-0.16116,0.15459,-0.026607,-0.28306,0.13802,7
3,0.164661,0.16332,0.1675,0.975293,0.97485,0.97551,0.146153,0.14582,0.14649,0.018096,...,0.004221,-0.046763,0.066896,-0.006815,-0.027883,0.015618,0.026272,-0.030321,0.081661,6
4,-0.2536,-0.26938,-0.23637,0.955712,0.9515,0.96018,0.142326,0.14112,0.14306,-0.044067,...,0.005279,-0.029431,0.052902,0.069537,0.054452,0.09166,-0.211458,-0.2401,-0.1645,6


In [91]:
df_agg_test['surface']= le.inverse_transform(df_agg_test['prediction'])

  if diff:


In [99]:
df_agg_test.to_csv('submission.csv')