In [1]:
from pathlib import Path
import os
from IPython.display import clear_output

project_root = next(
    (candidate for candidate in [Path.cwd()] + list(Path.cwd().parents) if (candidate / "pyproject.toml").exists()),
    Path.cwd(),
)
os.chdir(project_root)


In [None]:
from fits.data.download import DownloadDatasetAirQuality

DownloadDatasetAirQuality()



In [None]:
# import pandas as pd
# from fits.config import DatasetsPaths

# df = pd.read_csv(
#     DatasetsPaths.pm25.value,
#     index_col="datetime",
#     parse_dates=True,
# )

# df.head()


In [None]:
# from fits.data.dataset import ModelMode, DatasetAirQuality

# dataset = DatasetAirQuality(ModelMode.train)

# for sample in dataset:
#     break
# sample


In [2]:
import torch
from fits.modelling.framework import Train, Evaluate
from fits.data.dataset import DatasetAirQuality
from fits.data.dataloader import ForecastingDataLoader


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_loader, valid_loader, test_loader = ForecastingDataLoader(
    DatasetAirQuality, batch_size=128
)
normalization_stats = train_loader.dataset.normalization_stats


In [3]:
import math
import pickle
from pathlib import Path
import matplotlib.pyplot as plt


def visualize_forecast_sample(
    evaluation_dir: str | Path,
    nsample: int = 10,
    n_features: int = 36,
    sample_index: int = 0,
    ncols: int = 4,
    figsize=(24, 36),
) -> None:
    """
    Plot a separate subplot for each feature in feature_index.
    """
    evaluation_dir = Path(evaluation_dir)
    generated_path = evaluation_dir / f"generated_outputs_nsample{nsample}.pk"

    with open(generated_path, "rb") as f:
        (
            forecasted_data,
            forecast_mask,
            observed_data,
            observed_mask,
            time_points,
            scaler_tensor,
            mean_tensor,
        ) = pickle.load(f)

    forecasted_data = forecasted_data.cpu()
    forecast_mask = forecast_mask.cpu()
    observed_data = observed_data.cpu()
    observed_mask = observed_mask.cpu()
    time_points = time_points.cpu()

    time_axis = time_points[sample_index].numpy()

    ncols = min(ncols, n_features)
    nrows = math.ceil(n_features / ncols)

    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize)
    axes = axes.flatten() if n_features > 1 else [axes]

    # ---- LOOP OVER FEATURES ----
    for ax, feat in zip(axes, range(n_features)):
        forecast_samples = forecasted_data[sample_index, :, :, feat]
        sample_mask = forecast_mask[sample_index, :, feat].bool()
        observed_series = observed_data[sample_index, :, feat]
        observed_series_mask = observed_mask[sample_index, :, feat].bool()

        # Median + intervals
        median = forecast_samples.median(dim=0).values
        lower, upper = torch.quantile(forecast_samples, torch.tensor([0.1, 0.9]), dim=0)

        # Mask missing
        median = median.masked_fill(~sample_mask, torch.nan)
        lower = lower.masked_fill(~sample_mask, torch.nan)
        upper = upper.masked_fill(~sample_mask, torch.nan)

        # Convert
        obs_mask_np = observed_series_mask.numpy()
        obs_series_np = observed_series.numpy()
        sample_mask_np = sample_mask.numpy()
        median_np = median.numpy()
        lower_np = lower.numpy()
        upper_np = upper.numpy()

        # ---- PLOTTING INTO ax ----
        ax.scatter(
            time_axis[obs_mask_np],
            obs_series_np[obs_mask_np],
            color="black",
            s=10,
            label="Observed",
        )
        ax.plot(time_axis, median_np, label="Median", color="tab:green")
        ax.fill_between(
            time_axis,
            lower_np,
            upper_np,
            where=sample_mask_np,
            alpha=0.3,
            color="tab:green",
            label="10â€“90%",
        )

        ax.set_title(f"Feature {feat}")
        ax.set_xlabel("Time step")
        ax.set_ylabel("Value")
        ax.grid(True)
        ax.legend()

    # Turn off unused axes if any
    for ax in axes[n_features:]:
        ax.axis("off")

    plt.tight_layout()
    plt.show()


# CSDI

In [None]:
from fits.modelling.CSDI.adapter import CSDIAdapter


csdi = CSDIAdapter().to(device)


In [None]:
Train(csdi, train_loader, valid_loader)


In [None]:
state = torch.load(
    "../data/models/training/CSDIAdapter_20251202_130631/model.pth",
    map_location=device,
)

csdi.load_state_dict(state)
csdi.to(device)

clear_output()


In [None]:
Evaluate(csdi, test_loader, normalization_stats, nsample=5)


In [None]:
visualize_forecast_sample("../data/models/evaluation/CSDIAdapter_20251202_161117", nsample=5, sample_index=1)


# DiffusionTS

In [5]:
from fits.modelling.DiffusionTS.adapter import DiffusionTSAdapter


diffusionts = DiffusionTSAdapter().to(device)


TypeError: unsupported operand type(s) for %: 'NoneType' and 'int'

In [None]:
Train(diffusionts, train_loader, valid_loader)
