# Robot Navigation - Multivariate Time Series Classification


## II. Feature Engineering and Feature Extraction

### Pre-processing

In [3]:
from sklearn import preprocessing
import pandas as pd

In [4]:
# read data
dat_X = pd.read_csv("X_train.csv")
dat_y = pd.read_csv("y_train.csv")

In [5]:
# normalize X 
X = preprocessing.normalize(dat_X.iloc[:, 3:], norm = 'max', axis = 0)
X = pd.DataFrame(X, columns = dat_X.columns[3:])
X[['series_id', 'measurement_number']] = dat_X[['series_id', 'measurement_number']]

### Feature Engineering

#### `Orientation` Features

- Means
- Correlations
- Quaternion measures  to Euler Angle measures

In [6]:
# add the means among the 'orientation' variables
X.groupby("series_id").mean()

Unnamed: 0_level_0,orientation_X,orientation_Y,orientation_Z,orientation_W,angular_velocity_X,angular_velocity_Y,angular_velocity_Z,linear_acceleration_X,linear_acceleration_Y,linear_acceleration_Z,measurement_number
series_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,-0.767026,-0.640638,-0.647756,-0.679796,-0.001046,-0.003064,0.005429,0.007159,0.024563,-0.123636,63.5
1,-0.969170,0.244396,0.194376,-0.937783,0.001942,-0.007188,0.004474,0.003312,0.022785,-0.124544,63.5
2,-0.517700,-0.855020,-0.794513,-0.453849,0.001116,-0.008555,0.020175,0.004069,0.023761,-0.124636,63.5
3,-0.949519,0.313384,0.239237,-0.908689,0.000263,-0.002599,0.005685,0.005484,0.021878,-0.125370,63.5
4,-0.901123,0.432622,0.368829,-0.871283,0.002939,0.042729,-0.102635,-0.002342,0.024541,-0.124028,63.5
...,...,...,...,...,...,...,...,...,...,...,...
3805,-0.231308,0.971788,0.888596,-0.270467,0.001062,0.007857,-0.015351,0.007248,0.022596,-0.125558,63.5
3806,0.548238,0.835235,0.797703,0.487390,-0.000191,0.003159,-0.000300,0.003709,0.023873,-0.124257,63.5
3807,-0.421101,0.904738,0.860928,-0.447044,0.013428,0.096170,-0.243839,-0.001662,0.030132,-0.121939,63.5
3808,-0.267499,0.962698,0.872447,-0.291822,0.000476,-0.027166,0.063859,0.005951,0.025315,-0.123968,63.5


In [7]:
# add the correlations among the 'orientation' variables
correlations = {'cor_XY':[],'cor_XZ':[],'cor_XW':[], 'cor_YZ':[],'cor_YW':[],'cor_ZW':[]}
for id in range(3810):
    temp = X[X['series_id'] == id]
    correlations['cor_XY'].append(temp['orientation_X'].corr(temp['orientation_Y']))
    correlations['cor_XZ'].append(temp['orientation_X'].corr(temp['orientation_Z']))
    correlations['cor_XW'].append(temp['orientation_X'].corr(temp['orientation_W']))
    correlations['cor_YZ'].append(temp['orientation_Z'].corr(temp['orientation_Y']))
    correlations['cor_YW'].append(temp['orientation_W'].corr(temp['orientation_Y']))
    correlations['cor_ZW'].append(temp['orientation_Z'].corr(temp['orientation_W']))

In [8]:
pd.DataFrame(correlations)

Unnamed: 0,cor_XY,cor_XZ,cor_XW,cor_YZ,cor_YW,cor_ZW
0,-0.968895,-0.209884,0.569175,-0.027350,-0.738308,0.491010
1,0.823808,0.839093,-0.422316,0.700512,0.161287,-0.277040
2,-0.995043,-0.724096,0.339594,0.652254,-0.428607,0.367623
3,0.340573,0.826403,-0.774881,-0.192437,0.329802,-0.952539
4,0.999772,0.982865,0.779318,0.979234,0.791021,0.682156
...,...,...,...,...,...,...
3805,0.965801,0.173629,0.477317,-0.080076,0.522423,0.054732
3806,-0.999985,-0.992173,0.998785,0.991824,-0.998834,-0.991860
3807,0.999305,0.860483,0.876115,0.843755,0.889302,0.541529
3808,0.984334,0.317170,0.824703,0.145601,0.891840,-0.143852
