In [8]:
import sys
from pathlib import Path

if "__file__" in globals():
    ROOT = Path(__file__).resolve().parent.parent
else:
    ROOT = Path().resolve().parent.parent

if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))

In [11]:
from anomalysuite.data.random_data import RandomDataGenerator
from anomalysuite.reconstructor.tranad import TranAD
import lightning as L
from lightning.pytorch.callbacks.early_stopping import EarlyStopping


from torch.utils.data.dataloader import DataLoader
from torch.utils.data.dataset import TensorDataset
import pandas as pd
import torch

BATCH_SIZE = 64
SEQUNECE_LENGTH = 50
NUMBER_OF_FEATURES = 3

In [None]:
data_generator = RandomDataGenerator(length=10000, anomaly_length=100, features=NUMBER_OF_FEATURES, seed=42)
df = data_generator.get_data()
data_generator.plot()

In [None]:
length = len(df)
df_train = df.iloc[: int(length * 0.5)]
df_val = df.iloc[int(length * 0.5) : int(length * 0.7)]
df_pred = df.iloc[int(length * 0.7) :]


def df_to_dataloader(df: pd.DataFrame, batch_size: int) -> DataLoader:
    """Convert pandas DataFrame to torch DataLoader.

    pandas DataFrame data will be windowed(rolled) to use on TranAD model.
    for example) if df has 10000 rows and 3 columns: (10000,3)
    it will be shaped (9951, 3, 50) when the sequence length is 50.
    then dataloader will pass this data with the shape of (batch_size, 3, 50)

    Args:
        df: pandas DataFrame
        batch_size: batch size for DataLoader.

    Returns:
        torch DataLoader.

    """
    data_tensor = torch.FloatTensor(df.values)
    rolled_tensor = data_tensor.unfold(0, SEQUNECE_LENGTH, 1)
    rolled_tensor = rolled_tensor.permute(0, 2, 1)
    dataset = TensorDataset(rolled_tensor, rolled_tensor)
    return DataLoader(dataset, batch_size=batch_size, num_workers=10)


train_dataloader = df_to_dataloader(df_train, batch_size=BATCH_SIZE)
val_dataloader = df_to_dataloader(df_val, batch_size=BATCH_SIZE)
pred_dataloader = df_to_dataloader(df_pred, batch_size=BATCH_SIZE)

In [None]:
model = TranAD(
    sequence_length=SEQUNECE_LENGTH, number_of_features=NUMBER_OF_FEATURES, dim_feedforward=1024, num_layers=4
)

trainer = L.Trainer(callbacks=[EarlyStopping(monitor="val_loss", patience=5)])
trainer.fit(model, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)

In [None]:
pred = trainer.predict(model, pred_dataloader)

In [None]:
pd.concat([pd.DataFrame(pred[0][0]), pd.DataFrame(list(pred_dataloader)[0][0][0])], axis=1).plot()