 # TTM zero-shot and few-shot benchmarking on multiple datasets

  **Using TTM-1024-96 model with Frequency Tuning.**

## Imports

In [1]:
import logging
import math
import warnings

import matplotlib.pyplot as plt
import pandas as pd
from torch.optim import AdamW
from torch.optim.lr_scheduler import OneCycleLR
from transformers import EarlyStoppingCallback, Trainer, TrainingArguments, set_seed

from tsfm_public import TinyTimeMixerForPrediction, TrackingCallback, count_parameters, load_dataset
from tsfm_public.toolkit.lr_finder import optimal_lr_finder
from tsfm_public.toolkit.visualization import plot_predictions


warnings.filterwarnings("ignore")


logging.basicConfig(level=logging.ERROR)

2024-10-04 09:09:31.338304: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-10-04 09:09:31.388332: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  warn(f"Failed to load image Python extension: {e}")


## Important arguments

In [2]:
# Set seed
SEED = 42
set_seed(SEED)

# Specify model parameters
context_length = 1024
forecast_length = 96
freeze_backbone = True
enable_prefix_tuning = True

# Other args
EPOCHS = 50
NUM_WORKERS = 16

# Make sure all the datasets in the following `list_datasets` are
# saved in the `DATA_ROOT_PATH` folder. Or, change it accordingly.
# Refer to the load_dataset() function
# in notebooks/hfdemo/tinytimemixer/utils/ttm_utils.py
# to see how it is used.
DATA_ROOT_PATH = "/dccstor/tsfm23/datasets/"

# This is where results will be saved
OUT_DIR = f"ttm_v2_freq_results_benchmark_{context_length}_{forecast_length}/"

## List of benchmark datasets (TTM was not pre-trained on any of these)

In [3]:
list_datasets = [
    "etth1",
    "etth2",
    "ettm1",
    "ettm2",
    "weather",
    "electricity",
    "traffic",
]

## Get model path

In [4]:
# TTM models for Only Research and Academic (Non-Commercial) Use are here: https://huggingface.co/ibm/ttm-research-r2
# Please provide the branch name properly based on context_len and forecast_len

hf_model_path = "ibm/ttm-research-r2"
if context_length == 512:
    hf_model_branch = "main"
elif context_length == 1024 or context_length == 1536:
    hf_model_branch = f"{context_length}_{forecast_length}_ft_r2"
else:
    raise ValueError("Valid context lengths are: 512, 1024, and 1536 for now. Stay tuned for more TTM models.")

## Main benchmarking loop

In [5]:
all_results = {
    "dataset": [],
    "zs_mse": [],
    "fs5_mse": [],
    "zs_eval_time": [],
    "fs5_mean_epoch_time": [],
    "fs5_total_train_time": [],
    "fs5_best_val_metric": [],
}
# Loop over data
for DATASET in list_datasets:
    print()
    print("=" * 100)
    print(
        f"Running zero-shot/few-shot for TTM-{context_length} on dataset = {DATASET}, forecast_len = {forecast_length}"
    )
    print(f"Model will be loaded from {hf_model_path}/{hf_model_branch}")
    SUBDIR = f"{OUT_DIR}/{DATASET}"

    # Set batch size
    if DATASET == "traffic":
        BATCH_SIZE = 8
    elif DATASET == "electricity":
        BATCH_SIZE = 32
    else:
        BATCH_SIZE = 64

    # Data prep: Get dataset
    _, _, dset_test = load_dataset(
        DATASET,
        context_length,
        forecast_length,
        dataset_root_path=DATA_ROOT_PATH,
        use_frequency_token=enable_prefix_tuning,
    )

    #############################################################
    ##### Use the pretrained model in zero-shot forecasting #####
    #############################################################
    # Load model
    zeroshot_model = TinyTimeMixerForPrediction.from_pretrained(hf_model_path, revision=hf_model_branch)

    # zeroshot_trainer
    zeroshot_trainer = Trainer(
        model=zeroshot_model,
        args=TrainingArguments(
            output_dir=f"{SUBDIR}/zeroshot",
            per_device_eval_batch_size=BATCH_SIZE,
            seed=SEED,
        ),
        eval_dataset=dset_test,
    )

    # evaluate = zero-shot performance
    print("+" * 20, "Test MSE zero-shot", "+" * 20)
    zeroshot_output = zeroshot_trainer.evaluate(dset_test)
    print(zeroshot_output)
    print("+" * 60)
    all_results["zs_eval_time"].append(zeroshot_output["eval_runtime"])

    # Plot
    plot_predictions(
        model=zeroshot_trainer.model,
        dset=dset_test,
        plot_dir=SUBDIR,
        num_plots=10,
        plot_prefix="test_zeroshot",
        channel=0,
    )
    plt.close()

    # write results
    all_results["dataset"].append(DATASET)
    all_results["zs_mse"].append(zeroshot_output["eval_loss"])

    ################################################################
    ## Use the pretrained model in few-shot 5% and 10% forecasting #
    ################################################################
    for fewshot_percent in [5]:
        # Set learning rate
        learning_rate = None  # `None` value indicates that the optimal_lr_finder() will be used

        print("-" * 20, f"Running few-shot {fewshot_percent}%", "-" * 20)
        # Data prep: Get dataset
        dset_train, dset_val, dset_test = load_dataset(
            DATASET,
            context_length,
            forecast_length,
            fewshot_fraction=fewshot_percent / 100,
            dataset_root_path=DATA_ROOT_PATH,
            use_frequency_token=enable_prefix_tuning,
        )

        # change head dropout to 0.7 for ett datasets
        if "ett" in DATASET:
            finetune_forecast_model = TinyTimeMixerForPrediction.from_pretrained(
                hf_model_path, revision=hf_model_branch, head_dropout=0.7
            )
        else:
            finetune_forecast_model = TinyTimeMixerForPrediction.from_pretrained(
                hf_model_path, revision=hf_model_branch
            )

        if freeze_backbone:
            print(
                "Number of params before freezing backbone",
                count_parameters(finetune_forecast_model),
            )

            # Freeze the backbone of the model
            for param in finetune_forecast_model.backbone.parameters():
                param.requires_grad = False

            # Count params
            print(
                "Number of params after freezing the backbone",
                count_parameters(finetune_forecast_model),
            )

        if learning_rate is None:
            learning_rate, finetune_forecast_model = optimal_lr_finder(
                finetune_forecast_model,
                dset_train,
                batch_size=BATCH_SIZE,
                enable_prefix_tuning=enable_prefix_tuning,
            )
            print("OPTIMAL SUGGESTED LEARNING RATE =", learning_rate)

        print(f"Using learning rate = {learning_rate}")
        finetune_forecast_args = TrainingArguments(
            output_dir=f"{SUBDIR}/fewshot_{fewshot_percent}",
            overwrite_output_dir=True,
            learning_rate=learning_rate,
            num_train_epochs=EPOCHS,
            do_eval=True,
            evaluation_strategy="epoch",
            per_device_train_batch_size=BATCH_SIZE,
            per_device_eval_batch_size=BATCH_SIZE,
            dataloader_num_workers=NUM_WORKERS,
            report_to=None,
            save_strategy="epoch",
            logging_strategy="epoch",
            save_total_limit=1,
            logging_dir=f"{SUBDIR}/fewshot_{fewshot_percent}",  # Make sure to specify a logging directory
            load_best_model_at_end=True,  # Load the best model when training ends
            metric_for_best_model="eval_loss",  # Metric to monitor for early stopping
            greater_is_better=False,  # For loss
            seed=SEED,
        )

        # Create the early stopping callback
        early_stopping_callback = EarlyStoppingCallback(
            early_stopping_patience=10,  # Number of epochs with no improvement after which to stop
            early_stopping_threshold=0.0,  # Minimum improvement required to consider as improvement
        )
        tracking_callback = TrackingCallback()

        # Optimizer and scheduler
        optimizer = AdamW(finetune_forecast_model.parameters(), lr=learning_rate)
        scheduler = OneCycleLR(
            optimizer,
            learning_rate,
            epochs=EPOCHS,
            steps_per_epoch=math.ceil(len(dset_train) / (BATCH_SIZE)),
        )

        finetune_forecast_trainer = Trainer(
            model=finetune_forecast_model,
            args=finetune_forecast_args,
            train_dataset=dset_train,
            eval_dataset=dset_val,
            callbacks=[early_stopping_callback, tracking_callback],
            optimizers=(optimizer, scheduler),
        )

        # Fine tune
        finetune_forecast_trainer.train()

        # Evaluation
        print(
            "+" * 20,
            f"Test MSE after few-shot {fewshot_percent}% fine-tuning",
            "+" * 20,
        )
        fewshot_output = finetune_forecast_trainer.evaluate(dset_test)
        print(fewshot_output)
        print("+" * 60)

        # Plot
        plot_predictions(
            model=finetune_forecast_trainer.model,
            dset=dset_test,
            plot_dir=SUBDIR,
            num_plots=10,
            plot_prefix=f"test_fewshot_{fewshot_percent}",
            channel=0,
        )
        plt.close()

        # write results
        all_results[f"fs{fewshot_percent}_mse"].append(fewshot_output["eval_loss"])
        all_results[f"fs{fewshot_percent}_mean_epoch_time"].append(tracking_callback.mean_epoch_time)
        all_results[f"fs{fewshot_percent}_total_train_time"].append(tracking_callback.total_train_time)
        all_results[f"fs{fewshot_percent}_best_val_metric"].append(tracking_callback.best_eval_metric)

    df_out = pd.DataFrame(all_results).round(3)
    print(df_out[["dataset", "zs_mse", "fs5_mse"]])
    df_out.to_csv(f"{OUT_DIR}/results_zero_few.csv")
    df_out.to_csv(f"{OUT_DIR}/results_zero_few.csv")

INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Dataset name: etth1, context length: 1024, prediction length 96
INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Data lengths: train = 7521, val = 2785, test = 2785



Running zero-shot/few-shot for TTM-1024 on dataset = etth1, forecast_len = 96
Model will be loaded from ibm/ttm-research-r2/1024_96_ft_r2


config.json:   0%|          | 0.00/1.51k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/12.5M [00:00<?, ?B/s]

INFO:p-854016:t-23192246899456:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


++++++++++++++++++++ Test MSE zero-shot ++++++++++++++++++++


{'eval_loss': 0.36299267411231995, 'eval_model_preparation_time': 0.0029, 'eval_runtime': 4.4622, 'eval_samples_per_second': 624.135, 'eval_steps_per_second': 9.861}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Dataset name: etth1, context length: 1024, prediction length 96
INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Data lengths: train = 285, val = 2785, test = 2785


-------------------- Running few-shot 5% --------------------
Number of params before freezing backbone 3126076
Number of params after freezing the backbone 980178
LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.
LR Finder: Using GPU:0.


INFO:p-854016:t-23192246899456:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


LR Finder: Suggested learning rate = 0.00020565123083486514
OPTIMAL SUGGESTED LEARNING RATE = 0.00020565123083486514
Using learning rate = 0.00020565123083486514


INFO:p-854016:t-23192246899456:base.py:_real_add_job:Added job "EmissionsTracker._measure_power" to job store "default"
INFO:p-854016:t-23192246899456:base.py:start:Scheduler started


Epoch,Training Loss,Validation Loss
1,1.4525,0.679759
2,1.3761,0.679738
3,1.2649,0.679831
4,1.1936,0.680443
5,0.9554,0.682166
6,0.7657,0.685723
7,0.6671,0.690811
8,0.5055,0.694354
9,0.4237,0.695601
10,0.3655,0.694273


INFO:p-854016:t-23177196345088:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:10:04 EDT)" (scheduled at 2024-10-04 09:10:04.201164-04:00)
INFO:p-854016:t-23177196345088:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:10:19 EDT)" executed successfully
INFO:p-854016:t-23177196345088:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:10:34 EDT)" (scheduled at 2024-10-04 09:10:19.201164-04:00)
INFO:p-854016:t-23177196345088:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:10:34 EDT)" executed successfully
INFO:p-854016:t-23192246899456:base.py:shutdown:Scheduler has been shut down
ERROR:p-854016:t-23192246899456:emissions.py:get_private_infra_emissions:Region:  not found for Country with ISO CODE : USA


[TrackingCallback] Mean Epoch Time = 1.0863359371821086 seconds, Total Train Time = 34.31828022003174
++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++


{'eval_loss': 0.36278820037841797, 'eval_runtime': 1.4201, 'eval_samples_per_second': 1961.099, 'eval_steps_per_second': 30.983, 'epoch': 12.0}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Dataset name: etth2, context length: 1024, prediction length 96
INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Data lengths: train = 7521, val = 2785, test = 2785


  dataset  zs_mse  fs5_mse
0   etth1   0.363    0.363

Running zero-shot/few-shot for TTM-1024 on dataset = etth2, forecast_len = 96
Model will be loaded from ibm/ttm-research-r2/1024_96_ft_r2


INFO:p-854016:t-23192246899456:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


++++++++++++++++++++ Test MSE zero-shot ++++++++++++++++++++


{'eval_loss': 0.2709115445613861, 'eval_model_preparation_time': 0.0024, 'eval_runtime': 1.4158, 'eval_samples_per_second': 1967.019, 'eval_steps_per_second': 31.077}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Dataset name: etth2, context length: 1024, prediction length 96
INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Data lengths: train = 285, val = 2785, test = 2785


-------------------- Running few-shot 5% --------------------
Number of params before freezing backbone 3126076
Number of params after freezing the backbone 980178
LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.
LR Finder: Using GPU:0.


INFO:p-854016:t-23192246899456:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


LR Finder: Suggested learning rate = 0.000298364724028334
OPTIMAL SUGGESTED LEARNING RATE = 0.000298364724028334
Using learning rate = 0.000298364724028334


INFO:p-854016:t-23192246899456:base.py:_real_add_job:Added job "EmissionsTracker._measure_power" to job store "default"
INFO:p-854016:t-23192246899456:base.py:start:Scheduler started


Epoch,Training Loss,Validation Loss
1,1.0756,0.228444
2,1.0394,0.229198
3,0.9167,0.230436
4,0.8349,0.232362
5,0.6684,0.235177
6,0.6088,0.23878
7,0.487,0.243153
8,0.4232,0.249117
9,0.3742,0.259001
10,0.3442,0.276216


INFO:p-854016:t-23185549661952:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:10:46 EDT)" (scheduled at 2024-10-04 09:10:46.693758-04:00)
INFO:p-854016:t-23185549661952:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:11:01 EDT)" executed successfully
INFO:p-854016:t-23192246899456:base.py:shutdown:Scheduler has been shut down
ERROR:p-854016:t-23192246899456:emissions.py:get_private_infra_emissions:Region:  not found for Country with ISO CODE : USA


[TrackingCallback] Mean Epoch Time = 1.1077192696658047 seconds, Total Train Time = 32.47017168998718
++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++


{'eval_loss': 0.2712791860103607, 'eval_runtime': 1.5242, 'eval_samples_per_second': 1827.234, 'eval_steps_per_second': 28.868, 'epoch': 11.0}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Dataset name: ettm1, context length: 1024, prediction length 96
INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Data lengths: train = 33441, val = 11425, test = 11425


  dataset  zs_mse  fs5_mse
0   etth1   0.363    0.363
1   etth2   0.271    0.271

Running zero-shot/few-shot for TTM-1024 on dataset = ettm1, forecast_len = 96
Model will be loaded from ibm/ttm-research-r2/1024_96_ft_r2


INFO:p-854016:t-23192246899456:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


++++++++++++++++++++ Test MSE zero-shot ++++++++++++++++++++


{'eval_loss': 0.32694563269615173, 'eval_model_preparation_time': 0.0024, 'eval_runtime': 4.6754, 'eval_samples_per_second': 2443.632, 'eval_steps_per_second': 38.285}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Dataset name: ettm1, context length: 1024, prediction length 96
INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Data lengths: train = 1581, val = 11425, test = 11425


-------------------- Running few-shot 5% --------------------
Number of params before freezing backbone 3126076
Number of params after freezing the backbone 980178
LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.
LR Finder: Using GPU:0.


INFO:p-854016:t-23192246899456:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


LR Finder: Suggested learning rate = 0.00043287612810830566
OPTIMAL SUGGESTED LEARNING RATE = 0.00043287612810830566
Using learning rate = 0.00043287612810830566


INFO:p-854016:t-23192246899456:base.py:_real_add_job:Added job "EmissionsTracker._measure_power" to job store "default"
INFO:p-854016:t-23192246899456:base.py:start:Scheduler started


Epoch,Training Loss,Validation Loss
1,0.8403,0.408443
2,0.6226,0.413902
3,0.4498,0.420073
4,0.3279,0.420382
5,0.2868,0.412459
6,0.2613,0.427272
7,0.2434,0.439357
8,0.2283,0.436092
9,0.2143,0.454617
10,0.2018,0.466312


INFO:p-854016:t-23177226843904:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:11:30 EDT)" (scheduled at 2024-10-04 09:11:30.859561-04:00)
INFO:p-854016:t-23177226843904:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:11:45 EDT)" executed successfully
INFO:p-854016:t-23177226843904:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:12:00 EDT)" (scheduled at 2024-10-04 09:11:45.859561-04:00)
INFO:p-854016:t-23177226843904:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:12:00 EDT)" executed successfully
INFO:p-854016:t-23177226843904:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:12:15 EDT)" (scheduled at 2024-10-04 09:12:00.859561-04:00)
INFO:p-854016:t-231772268439

[TrackingCallback] Mean Epoch Time = 1.4592871015722102 seconds, Total Train Time = 48.18804407119751
++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++


{'eval_loss': 0.3278079628944397, 'eval_runtime': 2.4712, 'eval_samples_per_second': 4623.251, 'eval_steps_per_second': 72.434, 'epoch': 11.0}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Dataset name: ettm2, context length: 1024, prediction length 96
INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Data lengths: train = 33441, val = 11425, test = 11425


  dataset  zs_mse  fs5_mse
0   etth1   0.363    0.363
1   etth2   0.271    0.271
2   ettm1   0.327    0.328

Running zero-shot/few-shot for TTM-1024 on dataset = ettm2, forecast_len = 96
Model will be loaded from ibm/ttm-research-r2/1024_96_ft_r2


INFO:p-854016:t-23192246899456:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


++++++++++++++++++++ Test MSE zero-shot ++++++++++++++++++++


{'eval_loss': 0.1779371052980423, 'eval_model_preparation_time': 0.0026, 'eval_runtime': 4.4828, 'eval_samples_per_second': 2548.632, 'eval_steps_per_second': 39.93}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Dataset name: ettm2, context length: 1024, prediction length 96
INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Data lengths: train = 1581, val = 11425, test = 11425


-------------------- Running few-shot 5% --------------------
Number of params before freezing backbone 3126076
Number of params after freezing the backbone 980178
LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.
LR Finder: Using GPU:0.


INFO:p-854016:t-23192246899456:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


LR Finder: Suggested learning rate = 0.0002477076355991711
OPTIMAL SUGGESTED LEARNING RATE = 0.0002477076355991711
Using learning rate = 0.0002477076355991711


INFO:p-854016:t-23192246899456:base.py:_real_add_job:Added job "EmissionsTracker._measure_power" to job store "default"
INFO:p-854016:t-23192246899456:base.py:start:Scheduler started


Epoch,Training Loss,Validation Loss
1,0.5245,0.122229
2,0.4148,0.123283
3,0.3102,0.125309
4,0.212,0.12854
5,0.1597,0.133482
6,0.142,0.138576
7,0.1337,0.137888
8,0.1281,0.140013
9,0.1216,0.141608
10,0.1168,0.148989


INFO:p-854016:t-23185409140480:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:12:31 EDT)" (scheduled at 2024-10-04 09:12:31.605868-04:00)
INFO:p-854016:t-23185409140480:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:12:46 EDT)" executed successfully
INFO:p-854016:t-23185409140480:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:13:01 EDT)" (scheduled at 2024-10-04 09:12:46.605868-04:00)
INFO:p-854016:t-23185409140480:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:13:01 EDT)" executed successfully
INFO:p-854016:t-23185409140480:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:13:16 EDT)" (scheduled at 2024-10-04 09:13:01.605868-04:00)
INFO:p-854016:t-231854091404

[TrackingCallback] Mean Epoch Time = 1.4911205118352717 seconds, Total Train Time = 47.828335762023926
++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++


{'eval_loss': 0.1781630963087082, 'eval_runtime': 2.6238, 'eval_samples_per_second': 4354.33, 'eval_steps_per_second': 68.221, 'epoch': 11.0}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Dataset name: weather, context length: 1024, prediction length 96


  dataset  zs_mse  fs5_mse
0   etth1   0.363    0.363
1   etth2   0.271    0.271
2   ettm1   0.327    0.328
3   ettm2   0.178    0.178

Running zero-shot/few-shot for TTM-1024 on dataset = weather, forecast_len = 96
Model will be loaded from ibm/ttm-research-r2/1024_96_ft_r2


INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Data lengths: train = 35768, val = 5175, test = 10444
INFO:p-854016:t-23192246899456:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


++++++++++++++++++++ Test MSE zero-shot ++++++++++++++++++++


{'eval_loss': 0.16557331383228302, 'eval_model_preparation_time': 0.0025, 'eval_runtime': 7.5397, 'eval_samples_per_second': 1385.208, 'eval_steps_per_second': 21.752}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Dataset name: weather, context length: 1024, prediction length 96
INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Data lengths: train = 1698, val = 5175, test = 10444


-------------------- Running few-shot 5% --------------------
Number of params before freezing backbone 3126076
Number of params after freezing the backbone 980178
LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.
LR Finder: Using GPU:0.


INFO:p-854016:t-23192246899456:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


LR Finder: Suggested learning rate = 0.00020565123083486514
OPTIMAL SUGGESTED LEARNING RATE = 0.00020565123083486514
Using learning rate = 0.00020565123083486514


INFO:p-854016:t-23192246899456:base.py:_real_add_job:Added job "EmissionsTracker._measure_power" to job store "default"
INFO:p-854016:t-23192246899456:base.py:start:Scheduler started


Epoch,Training Loss,Validation Loss
1,0.1612,0.385808
2,0.1538,0.38319
3,0.1459,0.382595
4,0.135,0.382253
5,0.1231,0.385421
6,0.1105,0.384698
7,0.1019,0.380126
8,0.0956,0.385159
9,0.0908,0.389009
10,0.0864,0.386302


INFO:p-854016:t-23177466210048:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:13:36 EDT)" (scheduled at 2024-10-04 09:13:36.934582-04:00)
INFO:p-854016:t-23177466210048:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:13:51 EDT)" executed successfully
INFO:p-854016:t-23177466210048:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:13:51 EDT)" executed successfully
INFO:p-854016:t-23177466210048:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:14:06 EDT)" (scheduled at 2024-10-04 09:13:51.934582-04:00)
INFO:p-854016:t-23177466210048:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:14:06 EDT)" executed successfully
INFO:p-854016:t-23177466210048:base.py:run_job:Running job "Em

[TrackingCallback] Mean Epoch Time = 2.039622292799108 seconds, Total Train Time = 86.71302151679993
++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++


{'eval_loss': 0.16545218229293823, 'eval_runtime': 4.3782, 'eval_samples_per_second': 2385.457, 'eval_steps_per_second': 37.458, 'epoch': 17.0}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Dataset name: electricity, context length: 1024, prediction length 96


   dataset  zs_mse  fs5_mse
0    etth1   0.363    0.363
1    etth2   0.271    0.271
2    ettm1   0.327    0.328
3    ettm2   0.178    0.178
4  weather   0.166    0.165

Running zero-shot/few-shot for TTM-1024 on dataset = electricity, forecast_len = 96
Model will be loaded from ibm/ttm-research-r2/1024_96_ft_r2


INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Data lengths: train = 17293, val = 2537, test = 5165
INFO:p-854016:t-23192246899456:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


++++++++++++++++++++ Test MSE zero-shot ++++++++++++++++++++


{'eval_loss': 0.15667933225631714, 'eval_model_preparation_time': 0.0026, 'eval_runtime': 32.0456, 'eval_samples_per_second': 161.177, 'eval_steps_per_second': 5.055}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Dataset name: electricity, context length: 1024, prediction length 96


-------------------- Running few-shot 5% --------------------


INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Data lengths: train = 774, val = 2537, test = 5165


Number of params before freezing backbone 3126076
Number of params after freezing the backbone 980178
LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.
LR Finder: Using GPU:0.


INFO:p-854016:t-23192246899456:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


LR Finder: Suggested learning rate = 5.590810182512223e-05
OPTIMAL SUGGESTED LEARNING RATE = 5.590810182512223e-05
Using learning rate = 5.590810182512223e-05


INFO:p-854016:t-23192246899456:base.py:_real_add_job:Added job "EmissionsTracker._measure_power" to job store "default"
INFO:p-854016:t-23192246899456:base.py:start:Scheduler started


Epoch,Training Loss,Validation Loss
1,0.154,0.132241
2,0.1504,0.132691
3,0.148,0.132196
4,0.1469,0.13072
5,0.1447,0.130376
6,0.1436,0.129266
7,0.1411,0.128518
8,0.1406,0.127543
9,0.1387,0.126815
10,0.1363,0.125934


INFO:p-854016:t-23177213200128:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:16:01 EDT)" (scheduled at 2024-10-04 09:16:01.931082-04:00)
INFO:p-854016:t-23177213200128:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:16:16 EDT)" executed successfully
INFO:p-854016:t-23177213200128:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:16:31 EDT)" (scheduled at 2024-10-04 09:16:16.931082-04:00)
INFO:p-854016:t-23177213200128:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:16:31 EDT)" executed successfully
INFO:p-854016:t-23177213200128:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:16:46 EDT)" (scheduled at 2024-10-04 09:16:31.931082-04:00)
INFO:p-854016:t-231772132001

[TrackingCallback] Mean Epoch Time = 5.812682814598084 seconds, Total Train Time = 842.4421577453613
++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++


{'eval_loss': 0.14788587391376495, 'eval_runtime': 20.3053, 'eval_samples_per_second': 254.367, 'eval_steps_per_second': 7.978, 'epoch': 50.0}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Dataset name: traffic, context length: 1024, prediction length 96


       dataset  zs_mse  fs5_mse
0        etth1   0.363    0.363
1        etth2   0.271    0.271
2        ettm1   0.327    0.328
3        ettm2   0.178    0.178
4      weather   0.166    0.165
5  electricity   0.157    0.148

Running zero-shot/few-shot for TTM-1024 on dataset = traffic, forecast_len = 96
Model will be loaded from ibm/ttm-research-r2/1024_96_ft_r2


INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Data lengths: train = 11161, val = 1661, test = 3413
INFO:p-854016:t-23192246899456:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


++++++++++++++++++++ Test MSE zero-shot ++++++++++++++++++++


{'eval_loss': 0.4755541682243347, 'eval_model_preparation_time': 0.0025, 'eval_runtime': 64.835, 'eval_samples_per_second': 52.641, 'eval_steps_per_second': 6.586}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Dataset name: traffic, context length: 1024, prediction length 96


-------------------- Running few-shot 5% --------------------


INFO:p-854016:t-23192246899456:data_handling.py:load_dataset:Data lengths: train = 467, val = 1661, test = 3413


Number of params before freezing backbone 3126076
Number of params after freezing the backbone 980178
LR Finder: Running learning rate (LR) finder algorithm. If the suggested LR is very low, we suggest setting the LR manually.
LR Finder: Using GPU:0.


INFO:p-854016:t-23192246899456:base.py:add_job:Adding job tentatively -- it will be properly scheduled when the scheduler starts


LR Finder: Suggested learning rate = 0.00017073526474706903
OPTIMAL SUGGESTED LEARNING RATE = 0.00017073526474706903
Using learning rate = 0.00017073526474706903


INFO:p-854016:t-23192246899456:base.py:_real_add_job:Added job "EmissionsTracker._measure_power" to job store "default"
INFO:p-854016:t-23192246899456:base.py:start:Scheduler started


Epoch,Training Loss,Validation Loss
1,0.3112,0.385815
2,0.2966,0.38457
3,0.2891,0.38335
4,0.2794,0.380428
5,0.2708,0.378043
6,0.2643,0.376897
7,0.2598,0.371791
8,0.2551,0.367791
9,0.2491,0.366351
10,0.2472,0.360846


INFO:p-854016:t-23177200080640:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:31:50 EDT)" (scheduled at 2024-10-04 09:31:50.993797-04:00)
INFO:p-854016:t-23177200080640:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:32:05 EDT)" executed successfully
INFO:p-854016:t-23177200080640:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:32:20 EDT)" (scheduled at 2024-10-04 09:32:05.993797-04:00)
INFO:p-854016:t-23177200080640:base.py:run_job:Job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:32:20 EDT)" executed successfully
INFO:p-854016:t-23177200080640:base.py:run_job:Running job "EmissionsTracker._measure_power (trigger: interval[0:00:15], next run at: 2024-10-04 09:32:35 EDT)" (scheduled at 2024-10-04 09:32:20.993797-04:00)
INFO:p-854016:t-231772000806

[TrackingCallback] Mean Epoch Time = 8.869987984092868 seconds, Total Train Time = 1373.7280249595642
++++++++++++++++++++ Test MSE after few-shot 5% fine-tuning ++++++++++++++++++++


{'eval_loss': 0.41427138447761536, 'eval_runtime': 37.1621, 'eval_samples_per_second': 91.841, 'eval_steps_per_second': 11.49, 'epoch': 49.0}
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
       dataset  zs_mse  fs5_mse
0        etth1   0.363    0.363
1        etth2   0.271    0.271
2        ettm1   0.327    0.328
3        ettm2   0.178    0.178
4      weather   0.166    0.165
5  electricity   0.157    0.148
6      traffic   0.476    0.414


## Benchmarking results*

*Some slight differences in the results as compared to the TTM paper results is possible due to different training environments.

In [6]:
df_out

Unnamed: 0,dataset,zs_mse,fs5_mse,zs_eval_time,fs5_mean_epoch_time,fs5_total_train_time,fs5_best_val_metric
0,etth1,0.363,0.363,4.462,1.086,34.318,0.68
1,etth2,0.271,0.271,1.416,1.108,32.47,0.228
2,ettm1,0.327,0.328,4.675,1.459,48.188,0.408
3,ettm2,0.178,0.178,4.483,1.491,47.828,0.122
4,weather,0.166,0.165,7.54,2.04,86.713,0.38
5,electricity,0.157,0.148,32.046,5.813,842.442,0.118
6,traffic,0.476,0.414,64.835,8.87,1373.728,0.345
