<summary>Table of Contents</summary>

- [1. Pretrain](#1-pretrain)
- [2. Finetune](#2-finetune)


This notebook demonstrates experiments on 5 countries with self-supervised learning. 
All parameters and settings are taken from the paper.

In [2]:
import pandas as pd
import subprocess
import os
import time
from utils.helper import extract_metrics_from_output, running_time

# 1. Pretrain 

Tracking loss - MAE.

In [3]:
# Dynamic variables
cuda_device = "1"
os.environ["CUDA_VISIBLE_DEVICES"] = cuda_device

countries = ['DE', 'GB', 'ES', 'FR', 'IT']
seq_lens = [512, 512, 336, 168, 168]
pred_lens = ["24", "96", "168"]

In [None]:
start = time.time()

countries = ['DE', 'GB', 'ES', 'FR', 'IT']
seq_lens = [512, 512, 336, 168, 168]
pred_lens = ["24", "96", "168"]

for i, country in enumerate(countries):
    for pred_len in pred_lens:
        if country == 'DE' and pred_len == '24':
            seq_len = 336
        else:
            seq_len = seq_lens[i]
            
        # Just use the same parameters as from the paper
        params = {
            "--context_points": seq_len,
            "--dset": country,
            "--mask_ratio": 0.4,
            "--scaler_type": "minmax",
            "--n_epochs_pretrain": 100,
            "--target_points": pred_len,
            "--batch_size": 128
        }

        # Build the command string
        command = "python PatchTST-main/PatchTST_self_supervised/patchtst_pretrain.py "

        # Add parameters to the command
        for key, value in params.items():
            if value is not None:
                command += f"{key} {value} "
            else:
                command += f"{key} "  # Add flags with no value

        # Execute the command
        print(f"==========Running command for {country}, pred_len {pred_len}:==========")
        !{command}
        
end = time.time()
hours, mins, secs = running_time(start, end)
print("Total time: {:0>2}h:{:0>2}m:{:05.2f}s".format(hours, mins, secs))

# 2. Finetune

Linear probing + full fine-tuning.

In [None]:
# Dynamic variables
cuda_device = "1"
os.environ["CUDA_VISIBLE_DEVICES"] = cuda_device

countries = ['DE', 'GB', 'ES', 'FR', 'IT']
seq_lens = [512, 512, 336, 168, 168]
pred_lens = ["24", "96", "168"]

start = time.time()

# List to store metrics for the DataFrame
data = []

for i, country in enumerate(countries):
    for pred_len in pred_lens:
        if country == 'DE' and pred_len == '24':
            seq_len = 336
        else:
            seq_len = seq_lens[i]

        # Define the parameters for each country and pred_len
        params = {
            "--dset": country,
            "--context_points": seq_len,
            "--is_finetune": 1,
            "--pretrained_model": f"saved_models/{country}/masked_patchtst/based_model/patchtst_pretrained_cw{seq_len}_patch12_stride12_epochs-pretrain100_mask0.4_model1.pth",
            "--scaler_type": "minmax",
            "--n_epochs_finetune": 20,
            "--target_points": pred_len,
            "--batch_size": 128

        }

        # Build the command string
        command = "python PatchTST-main/PatchTST_self_supervised/patchtst_finetune.py "

        # Add parameters to the command
        for key, value in params.items():
            if value is not None:
                command += f"{key} {value} "
            else:
                command += f"{key} "  # Add flags with no value

        print(f"==========Running command for {country}, pred_len {pred_len}:==========")

        # Run the command and capture the output
        process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)

        # Capture the output in real-time
        output = []
        for line in process.stdout:
            output.append(line)
            print(line, end='')  # Print in the .ipynb cell

        # Wait for the process to complete
        process.stdout.close()
        process.wait()

        try:
            # Extract metrics from the output using the function
            metrics = extract_metrics_from_output(output, if_supervised=False)
            mse, rmse, mae = metrics[0]

            # Append the results to the data list
            data.append({
                'Country': country,
                'Pred_len': pred_len,
                'MSE': mse,
                'RMSE': rmse,
                'MAE': mae
            })
        except ValueError as e:
            print(f"\nError: {e} for {country}, pred_len {pred_len}")

end = time.time()
hours, mins, secs = running_time(start, end)
print("Total time: {:0>2}h:{:0>2}m:{:05.2f}s".format(hours, mins, secs))

In [7]:
df_self_sup = pd.DataFrame(data)
df_self_sup.set_index(['Country', 'Pred_len'], inplace=True)
df_self_sup.columns = pd.MultiIndex.from_product([['Self-supervised'], ['MSE','RMSE', 'MAE']], names=['Model', 'Metrics'])

# Path if not exists
path = "results/patchtst/"
if not os.path.exists(path):
    os.makedirs(path)

# Save the results to a CSV file
df_self_sup.to_csv(os.path.join(path, "finetune_results.csv"), index=True)
df_self_sup.round(4)

Unnamed: 0_level_0,Model,Self-supervised,Self-supervised,Self-supervised
Unnamed: 0_level_1,Metrics,MSE,RMSE,MAE
Country,Pred_len,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
DE,24,0.0208,0.1443,0.0922
DE,96,0.0352,0.1875,0.1302
DE,168,0.037,0.1923,0.1359
GB,24,0.0248,0.1575,0.1047
GB,96,0.0402,0.2005,0.1417
GB,168,0.0427,0.2067,0.1478
ES,24,0.0095,0.0975,0.0604
ES,96,0.0181,0.1345,0.0888
ES,168,0.0204,0.1429,0.0948
FR,24,0.0097,0.0984,0.0557
