# Imports 

In [27]:
import sys
import pandas as pd
import numpy as np

sys.path.insert(1, '../')



from tsad.base.pipeline import Pipeline
from tsad.base.datasets import load_skab
from tsad.base.wrappers import SklearnWrapper


from tsad.tasks.eda import HighLevelDatasetAnalysisTask, TimeDiscretizationTask
from tsad.tasks.eda import FindNaNTask, EquipmentDowntimeTask
from tsad.tasks.preprocess import ScalingTask, ValueRangeProcessingTask, ResampleProcessingTask 
from tsad.tasks.preprocess import FeatureProcessingTask, SplitByNaNTask, PrepareSeqSamplesTask
from tsad.tasks.deep_learning_anomaly_detection import ResidualAnomalyDetectionTask
from tsad.tasks.deep_learning_forecasting import DeepLeaningTimeSeriesForecastingTask




# Data loading

In [41]:
dataset = load_skab()
columns = dataset.feature_names
targets = dataset.target_names
df = dataset.frame

In [42]:
df.index.levels[0]

Index(['air/0', 'air/1', 'fluid/0', 'fluid/1', 'fluid/2', 'fluid/3', 'fluid/4',
       'fluid/5', 'rotor/0', 'rotor/1', 'rotor/2', 'rotor/3', 'rotor/4',
       'temperature/0', 'valve1/0', 'valve1/1', 'valve1/10', 'valve1/11',
       'valve1/12', 'valve1/13', 'valve1/14', 'valve1/15', 'valve1/2',
       'valve1/3', 'valve1/4', 'valve1/5', 'valve1/6', 'valve1/7', 'valve1/8',
       'valve1/9', 'valve2/0', 'valve2/1', 'valve2/2', 'valve2/3'],
      dtype='object', name='experiment')

# Train Test Split

In [43]:
test_datasets = ['valve1/0', 'valve2/0', 'fluid/0']
train_raw = df.drop(test_datasets,level=0).droplevel(level=0)
test_raw = df.loc[test_datasets].droplevel(level=0)

# Making custom task

In [44]:
from sklearn.preprocessing import StandardScaler
StandardScalerTask = SklearnWrapper(StandardScaler)

# Making pipeline

In [60]:
%%time
pipeline = Pipeline([
    HighLevelDatasetAnalysisTask(),
    TimeDiscretizationTask(freq_tobe_approach='custom',FREQ_TOBE='1s'),# freq_tobe='1s'),
    FindNaNTask(),
    EquipmentDowntimeTask(),
    ResampleProcessingTask(),
    StandardScalerTask(),
    FeatureProcessingTask(),
    SplitByNaNTask(),
    PrepareSeqSamplesTask(len_seq=10),
#     DeepLeaningTimeSeriesForecastingTask(),
], show=False)
train = pipeline.fit(train_raw,n_epochs=7)


None
1s
Пропущено 102 датастов, из-за того что saples слишком малов в датасете. (len_seq + points_ahead + gap -1 <= len(df))
CPU times: total: 4.28 s
Wall time: 4.29 s


In [61]:
# pipeline.tasks[-2].kwargs['test_size'] = 0

In [62]:
pipeline.predict(test_raw)

UnsupportedTaskResultException: <class 'NoneType'>

In [18]:
pipeline = Pipeline(pipeline.tasks[:-1])

In [20]:
pipeline.predict(test_raw)

Exception: Can't find required task result of HighLevelDatasetAnalysisResult in Pipeline.

In [None]:
task = PrepareSeqSamplesTask(len_seq=10)
(X_train, _, y_train, _) , _ = task.fit(train)

task = PrepareSeqSamplesTask(len_seq=10)
(_, X_test, _, y_test), _ = task.fit(test)

dfs = [X_train,X_test,y_train,y_test]

In [None]:
task = ResidualAnomalyDetectionTask()

In [None]:
task.fit(dfs,result_base_eda=fit_pipeline.results[0])

In [None]:
task = DeepLeaningTimeSeriesForecastingTask()
task.fit(dfs,result_base_eda=fit_pipeline.results[0])