In [1]:
# Append path to use tsad without installing

import sys

sys.path.append('../')

In [2]:
import pandas as pd

from tsad.base.task import Task, TaskResult
from tsad.base.pipeline import Pipeline
from tsad.tasks.eda import HighLevelDatasetAnalysisTask

### Load external dataset - SKAB

In [3]:
from tsad.base.datasets import load_skab_teaser

dataset = load_skab_teaser()
frame = dataset.frame[0]

### Sklearn usage example

External method, algorithms and classes need to wrap with `Task` to use in TSAD pipelines

Create preprocessing task with external scikit-learn StandardScaler

In [4]:
class SklearnPreprocessingTaskResult(TaskResult):

    scaler: None
      
    def show(self) -> None:
        pass


class SklearnPreprocessingTask(Task):

    def fit_predict(self, df: pd.DataFrame) -> tuple[pd.DataFrame, TaskResult]:
        from sklearn.preprocessing import StandardScaler

        scaler = StandardScaler()
        scaler.fit(df)

        result = SklearnPreprocessingTaskResult()
        result.scaler = scaler

        return df, result

    def predict(self, df: pd.DataFrame, result: SklearnPreprocessingTaskResult) -> tuple[pd.DataFrame, TaskResult]:
        return pd.DataFrame(result.scaler.transform(df))

In [5]:
preprocessing_task = SklearnPreprocessingTask()
preprocessing_df, preprocessing_result = preprocessing_task.fit_predict(frame)

preprocessed_df = preprocessing_task.predict(frame, preprocessing_result)

Create anomaly detection task with external scikit-learn IsolationForest

In [6]:
class SklearnAnomalyTaskResult(TaskResult):

    isolation_forest: None
      
    def show(self) -> None:
        pass


class SklearnAnomalyTask(Task):

    def fit_predict(self, df: pd.DataFrame, preprocessing: SklearnPreprocessingTaskResult) -> tuple[pd.DataFrame, TaskResult]:
        from sklearn.ensemble import IsolationForest

        result = SklearnAnomalyTaskResult()
        result.isolation_forest = IsolationForest(max_samples=100, random_state=0)
        result.isolation_forest.fit(preprocessing.scaler.transform(df))

        return df, result

    def predict(self, df: pd.DataFrame, anomaly: SklearnAnomalyTaskResult, preprocessing: SklearnPreprocessingTaskResult) -> tuple[pd.DataFrame, TaskResult]:
        return pd.DataFrame(anomaly.isolation_forest.predict(df))

In [7]:
anomaly_task = SklearnAnomalyTask()
anomaly_df, anomaly_result = anomaly_task.fit_predict(frame, preprocessing_result)

anomaly_task.predict(preprocessed_df, anomaly_result, preprocessing_result).info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6405 entries, 0 to 6404
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   0       6405 non-null   int64
dtypes: int64(1)
memory usage: 50.2 KB


Create anomaly detection pipeline with external scikit-learn lib usage

In [8]:
anomaly_pipeline = Pipeline([
    HighLevelDatasetAnalysisTask(),
    SklearnPreprocessingTask(),
    SklearnAnomalyTask()
])
anomaly_pipeline.fit_predict(frame)
anomaly_df = anomaly_pipeline.predict(frame)

anomaly_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6405 entries, 0 to 6404
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   0       6405 non-null   int64
dtypes: int64(1)
memory usage: 50.2 KB


Create forecasting task with external scikit-learn LinearRegression

In [9]:
class SklearnForecastingTaskResult(TaskResult):

    linear_regression: None
      
    def show(self) -> None:
        pass


class SklearnForecastingTask(Task):

    def fit_predict(self, df: pd.DataFrame) -> tuple[pd.DataFrame, TaskResult]:
        from sklearn.linear_model import LinearRegression
        from sklearn.model_selection import train_test_split

        result = SklearnForecastingTaskResult()
        result.linear_regression = LinearRegression()
        train_list = train_test_split(df)
        result.linear_regression.fit(train_list[0].tail(len(train_list[1])), train_list[1])

        return df, result

    def predict(self, df: pd.DataFrame, anomaly: SklearnForecastingTaskResult) -> tuple[pd.DataFrame, TaskResult]:
        return pd.DataFrame(anomaly.linear_regression.predict(df))

In [10]:
forecasting_task = SklearnForecastingTask()
forecasting_df, forecasting_result = forecasting_task.fit_predict(frame)

forecasting_task.predict(frame, forecasting_result).info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6405 entries, 0 to 6404
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       6405 non-null   float64
 1   1       6405 non-null   float64
 2   2       6405 non-null   float64
 3   3       6405 non-null   float64
 4   4       6405 non-null   float64
 5   5       6405 non-null   float64
 6   6       6405 non-null   float64
 7   7       6405 non-null   float64
dtypes: float64(8)
memory usage: 400.4 KB


Create forecasting pipeline with external scikit-learn lib usage

In [11]:
forecasting_pipeline = Pipeline([
    HighLevelDatasetAnalysisTask(),
    SklearnForecastingTask()
])
forecasting_pipeline.fit_predict(frame)
forecasting_df = forecasting_pipeline.predict(frame)

forecasting_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6405 entries, 0 to 6404
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       6405 non-null   float64
 1   1       6405 non-null   float64
 2   2       6405 non-null   float64
 3   3       6405 non-null   float64
 4   4       6405 non-null   float64
 5   5       6405 non-null   float64
 6   6       6405 non-null   float64
 7   7       6405 non-null   float64
dtypes: float64(8)
memory usage: 400.4 KB
