<details>
<summary>Table of Contents</summary>

- [1. No RevIN](#1-no-revin-instanse-normalization)
- [2. No channel-independence (Channel-Mixing)](#2-no-channel-independence-channel-mixing)
- [3. No channel-independence (Channel-Mixing) & No RevIN](#3-no-channel-independence-channel-mixing-and-no-revin)
- [3. No Patching](#4-no-patching)


</details>

Ablation study on PatchTST components.

In [1]:
import os
import pandas as pd
import subprocess
import shutil
import time
from utils.helper import extract_metrics_from_output, convert_results_into_df, running_time

# 1. No RevIN (Instanse Normalization)

In [3]:
# Paths to files and data
data_path = os.getcwd() + "/datasets/"

script_path = "./PatchTST-main/PatchTST_supervised/run_longExp.py"

log_dir = f"logs/patchtst/"
if not os.path.exists(log_dir):
    os.makedirs(log_dir)

In [4]:
cuda_device = "1"
os.environ["CUDA_VISIBLE_DEVICES"] = cuda_device

# Dynamic variables
pred_lens = [24, 96, 168]
countries = ['DE', 'GB', 'ES', 'FR', 'IT']
num_cols = [5, 5, 3, 3, 3]
seq_len = 336
model = "PatchTST"
loss = "MAE"
itr=2

# Log file with all the results in 1 file
log_file_path = f"{log_dir}/{model}_no_revin.log"

# Parameters for tuning,but default
lr = 0.0001
n_heads = 16
e_layers = 3
d_model = 128
d_ff = 256
dropout = 0.2
patch_len = 32
stride = 16
batch_size = 128

# List to store the results
patchtst_results = []

In [5]:
start = time.time()

# Log file
with open(log_file_path, "w") as log_file:

    for i, country in enumerate(countries):
        
        country_start = time.time()
        statement_1 = f"\n=== Starting experiments for country: {country} ===\n"
        log_file.write(statement_1)
        print(statement_1)

        for pred_len in pred_lens:

            pred_len_start = time.time()
            statement_2 = f"\n=== Starting experiments for pred_len: {pred_len} ===\n"
            log_file.write(statement_2)
            print(statement_2) 
            model_id = f"{country}_{seq_len}_{pred_len}_{country}"
            dataset = f"{country}_data.csv"
            
            # Arguments for the command
            command = f"""
            python {script_path} \
              --is_training 1 \
              --root_path "{data_path}" \
              --data_path "{dataset}" \
              --model_id {model_id} \
              --model "{model}" \
              --data "custom" \
              --features M \
              --seq_len {seq_len} \
              --pred_len {pred_len} \
              --e_layers {e_layers} \
              --factor 1 \
              --enc_in {num_cols[i]} \
              --c_out {num_cols[i]} \
              --des 'Exp' \
              --train_epochs 20 \
              --patience 5 \
              --n_heads {n_heads} \
              --d_model {d_model} \
              --d_ff {d_ff} \
              --dropout {dropout} \
              --fc_dropout {dropout} \
              --patch_len {patch_len} \
              --stride {stride} \
              --overlapping_windows \
              --scaler_type minmax \
              --if_relu \
              --loss_fnc {loss} \
              --revin 0 \
              --itr {itr} --batch_size {batch_size} --learning_rate "{lr}"
            """

            # Log the country and prediction length
            log_file.write(f"\n--- Running model for {country}, pred_len={pred_len} ---\n")

            # Run the command and capture the output
            process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)

            # Capture the output in real-time
            output = []
            for line in process.stdout:
                output.append(line)
                print(line, end='')  # Print in the .ipynb cell
                log_file.write(line)  # Write to the log file

            process.wait()  # Wait for the process to finish
            shutil.rmtree('./checkpoints' )  # delete checkpoint files

            # Extract metrics for each iteration
            iteration_metrics = extract_metrics_from_output(output, itr)

            # Log the extracted metrics and save them
            for iteration, scaled_metrics in enumerate(iteration_metrics, start=1):

                patchtst_results.append({
                    'Country': country,
                    'Pred_len': pred_len,
                    'Iteration': iteration,
                    'MSE': scaled_metrics[0],
                    'RMSE': scaled_metrics[1],
                    'MAE': scaled_metrics[2],
                    })
                
            pred_len_end = time.time()
            hours_int, mins_int, secs_int = running_time(pred_len_start, pred_len_end)
            statement_3 = "Intermediate time for {} and pred_len {}: {:0>2}h:{:0>2}m:{:05.2f}s".format(country, pred_len, hours_int, mins_int, secs_int)
            log_file.write(statement_3)
            print(statement_3)

        country_end = time.time()
        hours_c, mins_c, secs_c = running_time(country_start, country_end)
        statement_4 = "Intermediate time for {}: {:0>2}h:{:0>2}m:{:05.2f}s".format(country, hours_c, mins_c, secs_c)
        log_file.write(statement_4)
        print(statement_4)

    end = time.time()
    hours, mins, secs = running_time(start, end)
    statement_5 = "Total time: {:0>2}h:{:0>2}m:{:05.2f}s".format(hours, mins, secs)
    log_file.write(statement_5)
    print(statement_5)


=== Starting experiments for country: DE ===


=== Starting experiments for pred_len: 24 ===

Args in experiment:
Namespace(random_seed=2021, is_training=1, model_id='DE_336_24_DE', model='PatchTST', data='custom', root_path='/vol/fob-vol3/nebenf24/riabchuv/my_work/datasets/', data_path='DE_data.csv', features='M', target='OT', freq='h', checkpoints='./checkpoints/', overlapping_windows=True, scaler_type='minmax', if_relu=True, channel_mixing=0, seq_len=336, label_len=48, pred_len=24, inverse=False, loss_fnc='MAE', fc_dropout=0.2, head_dropout=0.0, patch_len=32, stride=16, padding_patch='end', revin=0, affine=0, subtract_last=0, decomposition=0, kernel_size=25, individual=0, embed_type=0, enc_in=5, dec_in=7, c_out=5, d_model=128, n_heads=16, e_layers=3, d_layers=1, d_ff=256, moving_avg=25, factor=1, distil=True, dropout=0.2, embed='timeF', activation='gelu', output_attention=False, do_predict=False, num_workers=10, itr=2, train_epochs=20, batch_size=128, patience=5, learning_rate=0.00

In [6]:
shutil.rmtree("results_transformers") # we do not need this directory and results anymore. If you need - comment this line

path = 'results/patchtst'
patchtst_df = convert_results_into_df(patchtst_results, if_loss_fnc=False)

if not os.path.exists(path):
    os.makedirs(path)

# Final DF
patchtst_df.columns = pd.MultiIndex.from_product([['-RevIN'], ['MSE','RMSE', 'MAE']], names=['Model', 'Metrics'])
patchtst_df.to_csv(os.path.join(path, 'patchtst_no_revin.csv'))
patchtst_df.round(4)

Unnamed: 0_level_0,Model,-RevIN,-RevIN,-RevIN
Unnamed: 0_level_1,Metrics,MSE,RMSE,MAE
Country,Pred_len,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
DE,24,0.022,0.1484,0.0926
DE,96,0.0415,0.2037,0.1336
DE,168,0.0448,0.2116,0.1411
ES,24,0.0202,0.1422,0.0838
ES,96,0.03,0.1727,0.1122
ES,168,0.0293,0.1712,0.1158
FR,24,0.0112,0.1058,0.0617
FR,96,0.0208,0.1441,0.0848
FR,168,0.0242,0.1556,0.0917
GB,24,0.0284,0.1684,0.1077


# 2. No channel independence (Channel-Mixing)

In [7]:
# Log file with all the results in 1 file
log_file_path = f"{log_dir}/{model}_channel_mixing.log"

start = time.time()

# Log file
with open(log_file_path, "w") as log_file:

    for i, country in enumerate(countries):
        
        country_start = time.time()
        statement_1 = f"\n=== Starting experiments for country: {country} ===\n"
        log_file.write(statement_1)
        print(statement_1)

        for pred_len in pred_lens:

            pred_len_start = time.time()
            statement_2 = f"\n=== Starting experiments for pred_len: {pred_len} ===\n"
            log_file.write(statement_2)
            print(statement_2) 
            model_id = f"{country}_{seq_len}_{pred_len}_{country}"
            dataset = f"{country}_data.csv"
            
            # Arguments for the command
            command = f"""
            python {script_path} \
              --is_training 1 \
              --root_path "{data_path}" \
              --data_path "{dataset}" \
              --model_id {model_id} \
              --model "{model}" \
              --data "custom" \
              --features M \
              --seq_len {seq_len} \
              --pred_len {pred_len} \
              --e_layers {e_layers} \
              --factor 1 \
              --enc_in {num_cols[i]} \
              --c_out {num_cols[i]} \
              --des 'Exp' \
              --train_epochs 20 \
              --patience 5 \
              --n_heads {n_heads} \
              --d_model {d_model} \
              --d_ff {d_ff} \
              --dropout {dropout} \
              --fc_dropout {dropout} \
              --patch_len {patch_len} \
              --stride {stride} \
              --overlapping_windows \
              --scaler_type minmax \
              --if_relu \
              --loss_fnc {loss} \
              --channel_mixing 1 \
              --itr {itr} --batch_size {batch_size} --learning_rate "{lr}"
            """

            # Log the country and prediction length
            log_file.write(f"\n--- Running model for {country}, pred_len={pred_len} ---\n")

            # Run the command and capture the output
            process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)

            # Capture the output in real-time
            output = []
            for line in process.stdout:
                output.append(line)
                print(line, end='')  # Print in the .ipynb cell
                log_file.write(line)  # Write to the log file

            process.wait()  # Wait for the process to finish
            shutil.rmtree('./checkpoints' )  # delete checkpoint files

            # Extract metrics for each iteration
            iteration_metrics = extract_metrics_from_output(output, itr)

            # Log the extracted metrics and save them
            for iteration, scaled_metrics in enumerate(iteration_metrics, start=1):

                patchtst_results.append({
                    'Country': country,
                    'Pred_len': pred_len,
                    'Iteration': iteration,
                    'MSE': scaled_metrics[0],
                    'RMSE': scaled_metrics[1],
                    'MAE': scaled_metrics[2],
                    })
                
            pred_len_end = time.time()
            hours_int, mins_int, secs_int = running_time(pred_len_start, pred_len_end)
            statement_3 = "Intermediate time for {} and pred_len {}: {:0>2}h:{:0>2}m:{:05.2f}s".format(country, pred_len, hours_int, mins_int, secs_int)
            log_file.write(statement_3)
            print(statement_3)

        country_end = time.time()
        hours_c, mins_c, secs_c = running_time(country_start, country_end)
        statement_4 = "Intermediate time for {}: {:0>2}h:{:0>2}m:{:05.2f}s".format(country, hours_c, mins_c, secs_c)
        log_file.write(statement_4)
        print(statement_4)

    end = time.time()
    hours, mins, secs = running_time(start, end)
    statement_5 = "Total time: {:0>2}h:{:0>2}m:{:05.2f}s".format(hours, mins, secs)
    log_file.write(statement_5)
    print(statement_5)


=== Starting experiments for country: DE ===


=== Starting experiments for pred_len: 24 ===

Args in experiment:
Namespace(random_seed=2021, is_training=1, model_id='DE_336_24_DE', model='PatchTST', data='custom', root_path='/vol/fob-vol3/nebenf24/riabchuv/my_work/datasets/', data_path='DE_data.csv', features='M', target='OT', freq='h', checkpoints='./checkpoints/', overlapping_windows=True, scaler_type='minmax', if_relu=True, channel_mixing=1, seq_len=336, label_len=48, pred_len=24, inverse=False, loss_fnc='MAE', fc_dropout=0.2, head_dropout=0.0, patch_len=32, stride=16, padding_patch='end', revin=1, affine=0, subtract_last=0, decomposition=0, kernel_size=25, individual=0, embed_type=0, enc_in=5, dec_in=7, c_out=5, d_model=128, n_heads=16, e_layers=3, d_layers=1, d_ff=256, moving_avg=25, factor=1, distil=True, dropout=0.2, embed='timeF', activation='gelu', output_attention=False, do_predict=False, num_workers=10, itr=2, train_epochs=20, batch_size=128, patience=5, learning_rate=0.00

In [8]:
shutil.rmtree("results_transformers") # we do not need this directory and results anymore. If you need - comment this line

path = 'results/patchtst'
patchtst_df = convert_results_into_df(patchtst_results, if_loss_fnc=False)

if not os.path.exists(path):
    os.makedirs(path)

# Final DF
patchtst_df.columns = pd.MultiIndex.from_product([['CM'], ['MSE','RMSE', 'MAE']], names=['Model', 'Metrics'])
patchtst_df.to_csv(os.path.join(path, 'patchtst_channel_mixing.csv'))
patchtst_df.round(4)

Unnamed: 0_level_0,Model,CM,CM,CM
Unnamed: 0_level_1,Metrics,MSE,RMSE,MAE
Country,Pred_len,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
DE,24,0.0215,0.1467,0.0904
DE,96,0.0394,0.1984,0.1308
DE,168,0.0423,0.2057,0.138
ES,24,0.0151,0.121,0.0721
ES,96,0.0244,0.1551,0.1
ES,168,0.0252,0.158,0.1047
FR,24,0.0107,0.1032,0.0587
FR,96,0.02,0.1414,0.083
FR,168,0.0224,0.1496,0.089
GB,24,0.027,0.1643,0.1042


# 3. No channel independence (channel-mixing) and no ReVIN

In [9]:
# Log file with all the results in 1 file
log_file_path = f"{log_dir}/{model}_channel_mixing.log"

start = time.time()

# Log file
with open(log_file_path, "w") as log_file:

    for i, country in enumerate(countries):
        
        country_start = time.time()
        statement_1 = f"\n=== Starting experiments for country: {country} ===\n"
        log_file.write(statement_1)
        print(statement_1)

        for pred_len in pred_lens:

            pred_len_start = time.time()
            statement_2 = f"\n=== Starting experiments for pred_len: {pred_len} ===\n"
            log_file.write(statement_2)
            print(statement_2) 
            model_id = f"{country}_{seq_len}_{pred_len}_{country}"
            dataset = f"{country}_data.csv"
            
            # Arguments for the command
            command = f"""
            python {script_path} \
              --is_training 1 \
              --root_path "{data_path}" \
              --data_path "{dataset}" \
              --model_id {model_id} \
              --model "{model}" \
              --data "custom" \
              --features M \
              --seq_len {seq_len} \
              --pred_len {pred_len} \
              --e_layers {e_layers} \
              --factor 1 \
              --enc_in {num_cols[i]} \
              --c_out {num_cols[i]} \
              --des 'Exp' \
              --train_epochs 20 \
              --patience 5 \
              --n_heads {n_heads} \
              --d_model {d_model} \
              --d_ff {d_ff} \
              --dropout {dropout} \
              --fc_dropout {dropout} \
              --patch_len {patch_len} \
              --stride {stride} \
              --overlapping_windows \
              --scaler_type minmax \
              --if_relu \
              --loss_fnc {loss} \
              --channel_mixing 1 \
              --revin 0 \
              --itr {itr} --batch_size {batch_size} --learning_rate "{lr}"
            """

            # Log the country and prediction length
            log_file.write(f"\n--- Running model for {country}, pred_len={pred_len} ---\n")

            # Run the command and capture the output
            process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)

            # Capture the output in real-time
            output = []
            for line in process.stdout:
                output.append(line)
                print(line, end='')  # Print in the .ipynb cell
                log_file.write(line)  # Write to the log file

            process.wait()  # Wait for the process to finish
            shutil.rmtree('./checkpoints' )  # delete checkpoint files

            # Extract metrics for each iteration
            iteration_metrics = extract_metrics_from_output(output, itr)

            # Log the extracted metrics and save them
            for iteration, scaled_metrics in enumerate(iteration_metrics, start=1):

                patchtst_results.append({
                    'Country': country,
                    'Pred_len': pred_len,
                    'Iteration': iteration,
                    'MSE': scaled_metrics[0],
                    'RMSE': scaled_metrics[1],
                    'MAE': scaled_metrics[2],
                    })
                
            pred_len_end = time.time()
            hours_int, mins_int, secs_int = running_time(pred_len_start, pred_len_end)
            statement_3 = "Intermediate time for {} and pred_len {}: {:0>2}h:{:0>2}m:{:05.2f}s".format(country, pred_len, hours_int, mins_int, secs_int)
            log_file.write(statement_3)
            print(statement_3)

        country_end = time.time()
        hours_c, mins_c, secs_c = running_time(country_start, country_end)
        statement_4 = "Intermediate time for {}: {:0>2}h:{:0>2}m:{:05.2f}s".format(country, hours_c, mins_c, secs_c)
        log_file.write(statement_4)
        print(statement_4)

    end = time.time()
    hours, mins, secs = running_time(start, end)
    statement_5 = "Total time: {:0>2}h:{:0>2}m:{:05.2f}s".format(hours, mins, secs)
    log_file.write(statement_5)
    print(statement_5)


=== Starting experiments for country: DE ===


=== Starting experiments for pred_len: 24 ===

Args in experiment:
Namespace(random_seed=2021, is_training=1, model_id='DE_336_24_DE', model='PatchTST', data='custom', root_path='/vol/fob-vol3/nebenf24/riabchuv/my_work/datasets/', data_path='DE_data.csv', features='M', target='OT', freq='h', checkpoints='./checkpoints/', overlapping_windows=True, scaler_type='minmax', if_relu=True, channel_mixing=1, seq_len=336, label_len=48, pred_len=24, inverse=False, loss_fnc='MAE', fc_dropout=0.2, head_dropout=0.0, patch_len=32, stride=16, padding_patch='end', revin=0, affine=0, subtract_last=0, decomposition=0, kernel_size=25, individual=0, embed_type=0, enc_in=5, dec_in=7, c_out=5, d_model=128, n_heads=16, e_layers=3, d_layers=1, d_ff=256, moving_avg=25, factor=1, distil=True, dropout=0.2, embed='timeF', activation='gelu', output_attention=False, do_predict=False, num_workers=10, itr=2, train_epochs=20, batch_size=128, patience=5, learning_rate=0.00

In [10]:
shutil.rmtree("results_transformers") # we do not need this directory and results anymore. If you need - comment this line

path = 'results/patchtst'
patchtst_df = convert_results_into_df(patchtst_results, if_loss_fnc=False)

if not os.path.exists(path):
    os.makedirs(path)

# Final DF
patchtst_df.columns = pd.MultiIndex.from_product([['- RevIn & CM'], ['MSE','RMSE', 'MAE']], names=['Model', 'Metrics'])
patchtst_df.to_csv(os.path.join(path, 'patchtst_channel_mixing_no_revin.csv'))
patchtst_df.round(4)

Unnamed: 0_level_0,Model,- RevIn & CM,- RevIn & CM,- RevIn & CM
Unnamed: 0_level_1,Metrics,MSE,RMSE,MAE
Country,Pred_len,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
DE,24,0.0217,0.1473,0.0911
DE,96,0.0401,0.2002,0.1317
DE,168,0.0432,0.2077,0.1391
ES,24,0.0168,0.1281,0.076
ES,96,0.0263,0.1609,0.1041
ES,168,0.0266,0.1624,0.1084
FR,24,0.0108,0.1041,0.0597
FR,96,0.0203,0.1423,0.0836
FR,168,0.023,0.1516,0.0899
GB,24,0.0275,0.1657,0.1054


# 4. No patching

It runs more than 24 hours on 48GB GPU (1 country around 5-6 hours). Therefore I run it with portions. You can find full results in logs.

In [4]:
patch_len = 1
stride = 1

# Log file with all the results in 1 file
log_file_path = f"{log_dir}/{model}_no_patching.log"

start = time.time()

# Log file
with open(log_file_path, "w") as log_file:

    for i, country in enumerate(countries):
        
        country_start = time.time()
        statement_1 = f"\n=== Starting experiments for country: {country} ===\n"
        log_file.write(statement_1)
        print(statement_1)

        for pred_len in pred_lens:

            pred_len_start = time.time()
            statement_2 = f"\n=== Starting experiments for pred_len: {pred_len} ===\n"
            log_file.write(statement_2)
            print(statement_2) 
            model_id = f"{country}_{seq_len}_{pred_len}_{country}"
            dataset = f"{country}_data.csv"
            
            # Arguments for the command
            command = f"""
            python {script_path} \
              --is_training 1 \
              --root_path "{data_path}" \
              --data_path "{dataset}" \
              --model_id {model_id} \
              --model "{model}" \
              --data "custom" \
              --features M \
              --seq_len {seq_len} \
              --pred_len {pred_len} \
              --e_layers {e_layers} \
              --factor 1 \
              --enc_in {num_cols[i]} \
              --c_out {num_cols[i]} \
              --des 'Exp' \
              --train_epochs 20 \
              --patience 5 \
              --n_heads {n_heads} \
              --d_model {d_model} \
              --d_ff {d_ff} \
              --dropout {dropout} \
              --fc_dropout {dropout} \
              --patch_len {patch_len} \
              --stride {stride} \
              --overlapping_windows \
              --scaler_type minmax \
              --if_relu \
              --loss_fnc {loss} \
              --itr {itr} --batch_size {batch_size} --learning_rate "{lr}"
            """

            # Log the country and prediction length
            log_file.write(f"\n--- Running model for {country}, pred_len={pred_len} ---\n")

            # Run the command and capture the output
            process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)

            # Capture the output in real-time
            output = []
            for line in process.stdout:
                output.append(line)
                print(line, end='')  # Print in the .ipynb cell
                log_file.write(line)  # Write to the log file

            process.wait()  # Wait for the process to finish
            shutil.rmtree('./checkpoints' )  # delete checkpoint files

            # Extract metrics for each iteration
            iteration_metrics = extract_metrics_from_output(output, itr)

            # Log the extracted metrics and save them
            for iteration, scaled_metrics in enumerate(iteration_metrics, start=1):

                patchtst_results.append({
                    'Country': country,
                    'Pred_len': pred_len,
                    'Iteration': iteration,
                    'MSE': scaled_metrics[0],
                    'RMSE': scaled_metrics[1],
                    'MAE': scaled_metrics[2],
                    })
                
            pred_len_end = time.time()
            hours_int, mins_int, secs_int = running_time(pred_len_start, pred_len_end)
            statement_3 = "Intermediate time for {} and pred_len {}: {:0>2}h:{:0>2}m:{:05.2f}s".format(country, pred_len, hours_int, mins_int, secs_int)
            log_file.write(statement_3)
            print(statement_3)

        country_end = time.time()
        hours_c, mins_c, secs_c = running_time(country_start, country_end)
        statement_4 = "Intermediate time for {}: {:0>2}h:{:0>2}m:{:05.2f}s".format(country, hours_c, mins_c, secs_c)
        log_file.write(statement_4)
        print(statement_4)

    end = time.time()
    hours, mins, secs = running_time(start, end)
    statement_5 = "Total time: {:0>2}h:{:0>2}m:{:05.2f}s".format(hours, mins, secs)
    log_file.write(statement_5)
    print(statement_5)


=== Starting experiments for country: DE ===


=== Starting experiments for pred_len: 24 ===

Args in experiment:
Namespace(random_seed=2021, is_training=1, model_id='DE_336_24_DE', model='PatchTST', data='custom', root_path='/vol/fob-vol3/nebenf24/riabchuv/my_work/datasets/', data_path='DE_data.csv', features='M', target='OT', freq='h', checkpoints='./checkpoints/', overlapping_windows=True, scaler_type='minmax', if_relu=True, channel_mixing=0, seq_len=336, label_len=48, pred_len=24, inverse=False, loss_fnc='MAE', fc_dropout=0.2, head_dropout=0.0, patch_len=1, stride=1, padding_patch='end', revin=1, affine=0, subtract_last=0, decomposition=0, kernel_size=25, individual=0, embed_type=0, enc_in=5, dec_in=7, c_out=5, d_model=128, n_heads=16, e_layers=3, d_layers=1, d_ff=256, moving_avg=25, factor=1, distil=True, dropout=0.2, embed='timeF', activation='gelu', output_attention=False, do_predict=False, num_workers=10, itr=2, train_epochs=20, batch_size=128, patience=5, learning_rate=0.0001

KeyboardInterrupt: 

In [5]:
shutil.rmtree("results_transformers") # we do not need this directory and results anymore. If you need - comment this line

path = 'results/patchtst'
patchtst_df = convert_results_into_df(patchtst_results, if_loss_fnc=False)

if not os.path.exists(path):
    os.makedirs(path)

# Final DF
patchtst_df.columns = pd.MultiIndex.from_product([['- P'], ['MSE','RMSE', 'MAE']], names=['Model', 'Metrics'])
patchtst_df.to_csv(os.path.join(path, 'patchtst_no_patching.csv'))
patchtst_df.round(4)

Unnamed: 0_level_0,Model,- P,- P,- P
Unnamed: 0_level_1,Metrics,MSE,RMSE,MAE
Country,Pred_len,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
DE,24,0.0217,0.1472,0.0909
DE,96,0.038,0.1949,0.129
DE,168,0.0402,0.2004,0.1349
ES,24,0.0107,0.1034,0.0632
ES,96,0.0193,0.139,0.0894
ES,168,0.0217,0.1472,0.096
FR,24,0.0108,0.104,0.0585
FR,96,0.0205,0.1432,0.0837
FR,168,0.0216,0.1471,0.0878
GB,24,0.0264,0.1626,0.1037


# 5. TS Decomposition

In [5]:
# Log file with all the results in 1 file
log_file_path = f"{log_dir}/{model}_decomposition.log"

start = time.time()

# Log file
with open(log_file_path, "w") as log_file:

    for i, country in enumerate(countries):
        
        country_start = time.time()
        statement_1 = f"\n=== Starting experiments for country: {country} ===\n"
        log_file.write(statement_1)
        print(statement_1)

        for pred_len in pred_lens:

            pred_len_start = time.time()
            statement_2 = f"\n=== Starting experiments for pred_len: {pred_len} ===\n"
            log_file.write(statement_2)
            print(statement_2) 
            model_id = f"{country}_{seq_len}_{pred_len}_{country}"
            dataset = f"{country}_data.csv"
            
            # Arguments for the command
            command = f"""
            python {script_path} \
              --is_training 1 \
              --root_path "{data_path}" \
              --data_path "{dataset}" \
              --model_id {model_id} \
              --model "{model}" \
              --data "custom" \
              --features M \
              --seq_len {seq_len} \
              --pred_len {pred_len} \
              --e_layers {e_layers} \
              --factor 1 \
              --enc_in {num_cols[i]} \
              --c_out {num_cols[i]} \
              --des 'Exp' \
              --train_epochs 20 \
              --patience 5 \
              --n_heads {n_heads} \
              --d_model {d_model} \
              --d_ff {d_ff} \
              --dropout {dropout} \
              --fc_dropout {dropout} \
              --patch_len {patch_len} \
              --stride {stride} \
              --overlapping_windows \
              --scaler_type minmax \
              --if_relu \
              --loss_fnc {loss} \
              --decomposition 1 \
              --itr {itr} --batch_size {batch_size} --learning_rate "{lr}"
            """

            # Log the country and prediction length
            log_file.write(f"\n--- Running model for {country}, pred_len={pred_len} ---\n")

            # Run the command and capture the output
            process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)

            # Capture the output in real-time
            output = []
            for line in process.stdout:
                output.append(line)
                print(line, end='')  # Print in the .ipynb cell
                log_file.write(line)  # Write to the log file

            process.wait()  # Wait for the process to finish
            shutil.rmtree('./checkpoints' )  # delete checkpoint files

            # Extract metrics for each iteration
            iteration_metrics = extract_metrics_from_output(output, itr)

            # Log the extracted metrics and save them
            for iteration, scaled_metrics in enumerate(iteration_metrics, start=1):

                patchtst_results.append({
                    'Country': country,
                    'Pred_len': pred_len,
                    'Iteration': iteration,
                    'MSE': scaled_metrics[0],
                    'RMSE': scaled_metrics[1],
                    'MAE': scaled_metrics[2],
                    })
                
            pred_len_end = time.time()
            hours_int, mins_int, secs_int = running_time(pred_len_start, pred_len_end)
            statement_3 = "Intermediate time for {} and pred_len {}: {:0>2}h:{:0>2}m:{:05.2f}s".format(country, pred_len, hours_int, mins_int, secs_int)
            log_file.write(statement_3)
            print(statement_3)

        country_end = time.time()
        hours_c, mins_c, secs_c = running_time(country_start, country_end)
        statement_4 = "Intermediate time for {}: {:0>2}h:{:0>2}m:{:05.2f}s".format(country, hours_c, mins_c, secs_c)
        log_file.write(statement_4)
        print(statement_4)

    end = time.time()
    hours, mins, secs = running_time(start, end)
    statement_5 = "Total time: {:0>2}h:{:0>2}m:{:05.2f}s".format(hours, mins, secs)
    log_file.write(statement_5)
    print(statement_5)


=== Starting experiments for country: DE ===


=== Starting experiments for pred_len: 24 ===

Args in experiment:
Namespace(random_seed=2021, is_training=1, model_id='DE_336_24_DE', model='PatchTST', data='custom', root_path='/vol/fob-vol3/nebenf24/riabchuv/my_work/datasets/', data_path='DE_data.csv', features='M', target='OT', freq='h', checkpoints='./checkpoints/', overlapping_windows=True, scaler_type='minmax', if_relu=True, channel_mixing=0, seq_len=336, label_len=48, pred_len=24, inverse=False, loss_fnc='MAE', fc_dropout=0.2, head_dropout=0.0, patch_len=32, stride=16, padding_patch='end', revin=1, affine=0, subtract_last=0, decomposition=1, kernel_size=25, individual=0, embed_type=0, enc_in=5, dec_in=7, c_out=5, d_model=128, n_heads=16, e_layers=3, d_layers=1, d_ff=256, moving_avg=25, factor=1, distil=True, dropout=0.2, embed='timeF', activation='gelu', output_attention=False, do_predict=False, num_workers=10, itr=2, train_epochs=20, batch_size=128, patience=5, learning_rate=0.00

In [6]:
shutil.rmtree("results_transformers") # we do not need this directory and results anymore. If you need - comment this line

path = 'results/patchtst'
patchtst_df = convert_results_into_df(patchtst_results, if_loss_fnc=False)

if not os.path.exists(path):
    os.makedirs(path)

# Final DF
patchtst_df.columns = pd.MultiIndex.from_product([['Decomposition'], ['MSE','RMSE', 'MAE']], names=['Model', 'Metrics'])
patchtst_df.to_csv(os.path.join(path, 'patchtst_decomposition.csv'))
patchtst_df.round(4)

Unnamed: 0_level_0,Model,Decomposition,Decomposition,Decomposition
Unnamed: 0_level_1,Metrics,MSE,RMSE,MAE
Country,Pred_len,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
DE,24,0.0215,0.1467,0.0896
DE,96,0.0367,0.1917,0.1274
DE,168,0.0392,0.198,0.135
ES,24,0.01,0.1001,0.0607
ES,96,0.0189,0.1374,0.0879
ES,168,0.021,0.1448,0.0947
FR,24,0.0102,0.1009,0.0557
FR,96,0.0195,0.1396,0.0824
FR,168,0.0216,0.1469,0.0885
GB,24,0.0259,0.1611,0.1022


# 6. TS Decomposition + No RevIN

In [7]:
# Log file with all the results in 1 file
log_file_path = f"{log_dir}/{model}_decomposition_no_revin.log"

start = time.time()

# Log file
with open(log_file_path, "w") as log_file:

    for i, country in enumerate(countries):
        
        country_start = time.time()
        statement_1 = f"\n=== Starting experiments for country: {country} ===\n"
        log_file.write(statement_1)
        print(statement_1)

        for pred_len in pred_lens:

            pred_len_start = time.time()
            statement_2 = f"\n=== Starting experiments for pred_len: {pred_len} ===\n"
            log_file.write(statement_2)
            print(statement_2) 
            model_id = f"{country}_{seq_len}_{pred_len}_{country}"
            dataset = f"{country}_data.csv"
            
            # Arguments for the command
            command = f"""
            python {script_path} \
              --is_training 1 \
              --root_path "{data_path}" \
              --data_path "{dataset}" \
              --model_id {model_id} \
              --model "{model}" \
              --data "custom" \
              --features M \
              --seq_len {seq_len} \
              --pred_len {pred_len} \
              --e_layers {e_layers} \
              --factor 1 \
              --enc_in {num_cols[i]} \
              --c_out {num_cols[i]} \
              --des 'Exp' \
              --train_epochs 20 \
              --patience 5 \
              --n_heads {n_heads} \
              --d_model {d_model} \
              --d_ff {d_ff} \
              --dropout {dropout} \
              --fc_dropout {dropout} \
              --patch_len {patch_len} \
              --stride {stride} \
              --overlapping_windows \
              --scaler_type minmax \
              --if_relu \
              --loss_fnc {loss} \
              --decomposition 1 \
              --revin 0 \
              --itr {itr} --batch_size {batch_size} --learning_rate "{lr}"
            """

            # Log the country and prediction length
            log_file.write(f"\n--- Running model for {country}, pred_len={pred_len} ---\n")

            # Run the command and capture the output
            process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)

            # Capture the output in real-time
            output = []
            for line in process.stdout:
                output.append(line)
                print(line, end='')  # Print in the .ipynb cell
                log_file.write(line)  # Write to the log file

            process.wait()  # Wait for the process to finish
            shutil.rmtree('./checkpoints' )  # delete checkpoint files

            # Extract metrics for each iteration
            iteration_metrics = extract_metrics_from_output(output, itr)

            # Log the extracted metrics and save them
            for iteration, scaled_metrics in enumerate(iteration_metrics, start=1):

                patchtst_results.append({
                    'Country': country,
                    'Pred_len': pred_len,
                    'Iteration': iteration,
                    'MSE': scaled_metrics[0],
                    'RMSE': scaled_metrics[1],
                    'MAE': scaled_metrics[2],
                    })
                
            pred_len_end = time.time()
            hours_int, mins_int, secs_int = running_time(pred_len_start, pred_len_end)
            statement_3 = "Intermediate time for {} and pred_len {}: {:0>2}h:{:0>2}m:{:05.2f}s".format(country, pred_len, hours_int, mins_int, secs_int)
            log_file.write(statement_3)
            print(statement_3)

        country_end = time.time()
        hours_c, mins_c, secs_c = running_time(country_start, country_end)
        statement_4 = "Intermediate time for {}: {:0>2}h:{:0>2}m:{:05.2f}s".format(country, hours_c, mins_c, secs_c)
        log_file.write(statement_4)
        print(statement_4)

    end = time.time()
    hours, mins, secs = running_time(start, end)
    statement_5 = "Total time: {:0>2}h:{:0>2}m:{:05.2f}s".format(hours, mins, secs)
    log_file.write(statement_5)
    print(statement_5)


=== Starting experiments for country: DE ===


=== Starting experiments for pred_len: 24 ===



Args in experiment:
Namespace(random_seed=2021, is_training=1, model_id='DE_336_24_DE', model='PatchTST', data='custom', root_path='/vol/fob-vol3/nebenf24/riabchuv/my_work/datasets/', data_path='DE_data.csv', features='M', target='OT', freq='h', checkpoints='./checkpoints/', overlapping_windows=True, scaler_type='minmax', if_relu=True, channel_mixing=0, seq_len=336, label_len=48, pred_len=24, inverse=False, loss_fnc='MAE', fc_dropout=0.2, head_dropout=0.0, patch_len=32, stride=16, padding_patch='end', revin=0, affine=0, subtract_last=0, decomposition=1, kernel_size=25, individual=0, embed_type=0, enc_in=5, dec_in=7, c_out=5, d_model=128, n_heads=16, e_layers=3, d_layers=1, d_ff=256, moving_avg=25, factor=1, distil=True, dropout=0.2, embed='timeF', activation='gelu', output_attention=False, do_predict=False, num_workers=10, itr=2, train_epochs=20, batch_size=128, patience=5, learning_rate=0.0001, des='Exp', lradj='type3', pct_start=0.3, use_amp=False, use_gpu=True, gpu=0, use_multi_gpu=

In [8]:
shutil.rmtree("results_transformers") # we do not need this directory and results anymore. If you need - comment this line

path = 'results/patchtst'
patchtst_df = convert_results_into_df(patchtst_results, if_loss_fnc=False)

if not os.path.exists(path):
    os.makedirs(path)

# Final DF
patchtst_df.columns = pd.MultiIndex.from_product([['-RevIN + Decomposition '], ['MSE','RMSE', 'MAE']], names=['Model', 'Metrics'])
patchtst_df.to_csv(os.path.join(path, 'patchtst_decomposition_no_revin.csv'))
patchtst_df.round(4)

Unnamed: 0_level_0,Model,-RevIN + Decomposition,-RevIN + Decomposition,-RevIN + Decomposition
Unnamed: 0_level_1,Metrics,MSE,RMSE,MAE
Country,Pred_len,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
DE,24,0.0222,0.149,0.0929
DE,96,0.0401,0.2001,0.1329
DE,168,0.0434,0.208,0.1403
ES,24,0.0208,0.1389,0.0807
ES,96,0.0326,0.1762,0.1083
ES,168,0.0379,0.1894,0.118
FR,24,0.0121,0.1098,0.0626
FR,96,0.0237,0.1533,0.0901
FR,168,0.0267,0.1628,0.0972
GB,24,0.0268,0.1637,0.1058
