### Standard Benchmarks for temporal data

Imports

In [None]:
import pickle
import pandas as pd
from pathlib import Path
import argparse

from synthcity.utils.datasets.time_series.google_stocks import GoogleStocksDataloader
from synthcity.utils.datasets.time_series.sine import SineDataloader

from synthcity.plugins.core.dataloader import TimeSeriesDataLoader
from synthcity.benchmark import Benchmarks
import synthcity.logger as log

Set parameters for model

In [None]:
log.add("synthcity_logs", "INFO")

KWARGS = {"n_iter": 100}
KWARGS_str = "-".join([f"{k}:{v}" for k, v in KWARGS.items()])


Main functions

In [None]:
def run_dataset(loader, workspace_path, models):
    try:
        score = Benchmarks.evaluate(
            [(model, model, KWARGS) for model in models],
            loader.train(),
            loader.test(),
            task_type="time_series",
            synthetic_size=loader.dataframe().shape[0],
            synthetic_reuse_if_exists=False,
            augmented_reuse_if_exists=False,
            metrics={
                "stats": ["alpha_precision"],
                "detection": ["detection_xgb", "detection_mlp", "detection_linear"],
                "performance": [
                    "linear_model",
                    "mlp",
                    "xgb",
                ],
            },
            workspace=workspace_path,
            repeats=1,
            device="cpu",
        )
        print(score)
    except Exception as e:
        print("\n\n", e)
        score = None

    return score

def create_absolute_path(cwd, path):
    if cwd.name not in ["tutorials", "tests", "synthcity-benckmarking"]:
        cwd = cwd / Path("../")
    path = (cwd / path).resolve()
    seen = set()
    seen_add= seen.add
    path = "/".join([p for p in str(path).split("/") if not (p in seen or seen_add(p))])

    return Path(path)
    
def run_synthcity(models=["timegan"], dataset_loader_name="sine", save=False):
    dataset_loaders = {
        "sine": SineDataloader,
        "googlestocks": GoogleStocksDataloader,
    }

    cwd = Path.cwd()
    workspace_path = create_absolute_path(cwd, f"../workspace/time_series/")
    result_path = create_absolute_path(cwd, f"../results/time_series/")
    Path(result_path).mkdir(parents=True, exist_ok=True)

    # Load and Prep data
    (
        static_data,
        temporal_data,
        observation_times,
        outcome,
    ) = dataset_loaders[dataset_loader_name.lower()]().load()

    loader = TimeSeriesDataLoader(
        temporal_data=temporal_data,
        observation_times=observation_times,
        static_data=static_data,
        outcome=outcome,
    )
    print(loader.dataframe().head())
    score = run_dataset(loader, workspace_path, models)


    if score:
        Benchmarks.print(score)
        Benchmarks.highlight(score)
        if save:
            with open(
                f"{result_path}/{dataset_loader_name.lower()}-{'-'.join(models)}-{KWARGS_str}.pkl",
                "wb",
            ) as f:
                pickle.dump(score, f)


In [None]:
run_synthcity(models=["timevae"], dataset_loader_name="sine", save=False)