In [1]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from tsfresh.examples.robot_execution_failures import download_robot_execution_failures
from tsfresh.examples import load_robot_execution_failures
from tsfresh.feature_extraction.settings import MinimalFCParameters
from tsfresh.transformers import RelevantFeatureAugmenter

In [2]:
download_robot_execution_failures()
df, y = load_robot_execution_failures()
df.shape

(1320, 8)

In [19]:
df.head()

Unnamed: 0,id,time,F_x,F_y,F_z,T_x,T_y,T_z
0,1,0,-1,-1,63,-3,-1,0
1,1,1,0,0,62,-3,-1,0
2,1,2,-1,-1,61,-3,0,0
3,1,3,-1,-1,63,-2,-1,0
4,1,4,-1,-1,63,-3,-1,0


In [3]:
# Here, df contains the time series of both train and test set. 
# We will split it into a train df_train and a test set  df_test:
y_train, y_test = train_test_split(y)
df_train = df.loc[df.id.isin(y_train.index)]
df_test = df.loc[df.id.isin(y_test.index)]
X_train = pd.DataFrame(index=y_train.index)
X_test = pd.DataFrame(index=y_test.index)
df_train.shape, df_test.shape

((990, 8), (330, 8))

In [9]:
y_train

63    False
15     True
2      True
23    False
54    False
13     True
32    False
34    False
52    False
31    False
38    False
29    False
42    False
4      True
56    False
84    False
73    False
5      True
50    False
57    False
85    False
69    False
65    False
26    False
75    False
83    False
25    False
18     True
36    False
81    False
      ...  
79    False
10     True
22    False
35    False
77    False
44    False
30    False
6      True
37    False
72    False
11     True
24    False
17     True
86    False
53    False
33    False
76    False
87    False
40    False
47    False
55    False
74    False
67    False
43     True
48    False
82    False
16     True
21    False
78    False
88    False
Length: 66, dtype: bool

In [8]:
X_test

12
58
20
19
64
66
14
71
8
51
62


In [10]:
df_train.head()

Unnamed: 0,id,time,F_x,F_y,F_z,T_x,T_y,T_z
15,2,0,-1,-1,63,-2,-1,0
16,2,1,-1,-1,63,-3,-1,0
17,2,2,-1,-1,61,-3,0,0
18,2,3,0,-4,63,1,0,0
19,2,4,0,-1,59,-2,0,-1


In [11]:
df_test.head()

Unnamed: 0,id,time,F_x,F_y,F_z,T_x,T_y,T_z
0,1,0,-1,-1,63,-3,-1,0
1,1,1,0,0,62,-3,-1,0
2,1,2,-1,-1,61,-3,0,0
3,1,3,-1,-1,63,-2,-1,0
4,1,4,-1,-1,63,-3,-1,0


# pipeline

In [12]:
ppl = Pipeline([('fresh', RelevantFeatureAugmenter(column_id='id', column_sort='time', 
                                                   default_fc_parameters=MinimalFCParameters())),
                ('clf', RandomForestClassifier())])

In [13]:
# for the fit on the train test set, we set the fresh__timeseries_container to `df_train`
ppl.set_params(fresh__timeseries_container=df_train)
ppl.fit(X_train, y_train)

Feature Extraction: 100%|██████████| 20/20 [00:00<00:00, 2463.04it/s]
Feature Extraction: 100%|██████████| 20/20 [00:00<00:00, 35590.19it/s]


Pipeline(memory=None,
         steps=[('fresh',
                 RelevantFeatureAugmenter(chunksize=None, column_id='id',
                                          column_kind=None, column_sort='time',
                                          column_value=None,
                                          default_fc_parameters={'length': None,
                                                                 'maximum': None,
                                                                 'mean': None,
                                                                 'median': None,
                                                                 'minimum': None,
                                                                 'standard_deviation': None,
                                                                 'sum_values': None,
                                                                 'variance': None},
                                          disable_progressbar=False,
 

In [14]:
# for the predict on the test test set, we set the fresh__timeseries_container to `df_test`
ppl.set_params(fresh__timeseries_container=df_test)
y_pred = ppl.predict(X_test)

Feature Extraction: 100%|██████████| 19/19 [00:00<00:00, 40167.23it/s]


In [15]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

       False       1.00      1.00      1.00        16
        True       1.00      1.00      1.00         6

    accuracy                           1.00        22
   macro avg       1.00      1.00      1.00        22
weighted avg       1.00      1.00      1.00        22

