Here we perform a check on DE and FR datasets to confirm choice of loss function for our data.

In [1]:
import os
import re
import pandas as pd
import numpy as np
import subprocess
#from utils.helper import extract_metrics_from_output

# Test for Informer

In [2]:
# Paths to files and data
data_path = os.getcwd() + "/datasets/"

script_path = "./PatchTST-main/PatchTST_supervised/run_longExp.py"

# Arguments that will be used also for file names
model = "Informer"
dataset = 'DE_data.csv'
losses = ["MSE", "RMSE", "MAE"]
country = dataset[:2]

log_dir = f"logs/loss_choice"
if not os.path.exists(log_dir):
    os.makedirs(log_dir)

log_file_path = f"{log_dir}/{model}_{country}.log"

In [5]:
# Dynamic variables
pred_lens = ["24", "96", "168"]
seq_len = "96"
lr = "0.0001"

informer_results = []

with open(log_file_path, "w") as log_file:
    for loss in losses:

      log_file.write(f"\n=== Starting experiments for loss function: {loss} ===\n")
      print(f"\n=== Starting experiments for loss function: {loss} ===")  # Print to notebook

      for pred_len in pred_lens:
        log_file.write(f"\n=== Starting experiments for pred_len: {pred_len} ===\n")
        print(f"\n=== Starting experiments for pred_len: {pred_len} ===")
        model_id = f"{country}_{seq_len}_{pred_len}_loss_choice_for_{country}"

        # Arguments for the command
        command = f"""
        python {script_path} \
          --random_seed 2021 \
          --is_training 1 \
          --root_path "{data_path}" \
          --data_path "{dataset}" \
          --model_id {model_id} \
          --model "{model}" \
          --data "custom" \
          --features M \
          --seq_len {seq_len} \
          --label_len 5 \
          --pred_len {pred_len} \
          --e_layers 2 \
          --d_layers 1 \
          --factor 5 \
          --enc_in 5 \
          --dec_in 5 \
          --c_out 5 \
          --des 'Exp' \
          --train_epochs 10 \
          --patience 3 \
          --overlapping_windows \
          --loss_fnc "{loss}" \
          --itr 2 --batch_size 32 --learning_rate "{lr}"
        """

        # Run the command and capture the output
        process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)

        # Capture the output in real-time
        output = []
        for line in process.stdout:
            output.append(line)
            print(line, end='')  # Print in the .ipynb cell
            log_file.write(line)  # Write to the log file

        # Wait for the process to complete
        process.wait()

        # Extract metrics from the captured output
        mse, rmse, mae, rse = extract_metrics_from_output(output)

        # Log the extracted metrics
        log_file.write(f"\nExtracted Metrics for {country}, pred_len={pred_len}:\n")
        log_file.write(f"MSE: {mse}, RMSE: {rmse}, MAE: {mae}, RSE: {rse}\n")

        # Append the results to the informer_results list
        informer_results.append({
            'Loss_function': loss,
            'Pred_len': pred_len,
            'MSE': mse,
            'RMSE': rmse,
            'MAE': mae,
            'RSE': rse
        })


=== Starting experiments for loss function: MSE ===
Args in experiment:
Namespace(random_seed=2021, is_training=1, model_id='DE_96_24_loss_choice_for_DE', model='Informer', data='custom', root_path='/vol/cs-hu/riabchuv/my_work/datasets/', data_path='DE_data.csv', features='M', target='OT', freq='h', checkpoints='./checkpoints/', overlapping_windows=True, seq_len=96, label_len=5, pred_len=24, inverse=False, loss_fnc='MSE', fc_dropout=0.05, head_dropout=0.0, patch_len=16, stride=8, padding_patch='end', revin=1, affine=0, subtract_last=0, decomposition=0, kernel_size=25, individual=0, embed_type=0, enc_in=5, dec_in=5, c_out=5, d_model=512, n_heads=8, e_layers=2, d_layers=1, d_ff=2048, moving_avg=25, factor=5, distil=True, dropout=0.05, embed='timeF', activation='gelu', output_attention=False, do_predict=False, num_workers=10, itr=2, train_epochs=10, batch_size=32, patience=3, learning_rate=0.0001, des='Exp', loss='mse', lradj='type3', pct_start=0.3, use_amp=False, use_gpu=True, gpu=0, us

In [3]:
import subprocess
import re

# Dynamic variables
pred_lens = ["24", "96", "168"]
seq_len = "96"
lr = "0.0001"
itr = 2  
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

informer_results = []

# Define a function to extract metrics for both iterations
def extract_metrics_from_output(output, itr):
    # Define a combined pattern to capture all metrics in a single match
    pattern = re.compile(
        r"mse:\s*([\d.]+),\s*rmse:\s*([\d.]+),\s*mae:\s*([\d.]+),\s*rse:\s*([\d.]+)",
        re.IGNORECASE
    )
    
    # Join the output lines into a single string for easier regex matching
    output_str = "\n".join(output)
    
    # Find all matches of the combined metric pattern
    matches = pattern.findall(output_str)
    
    # Ensure we have enough matches for the number of iterations requested
    if len(matches) < itr:
        raise ValueError(f"Expected at least {itr} iterations, but found only {len(matches)}.")
    
    # Convert each match to a tuple of floats and return the first 'itr' matches
    return [tuple(map(float, match)) for match in matches[:itr]]


with open(log_file_path, "w") as log_file:
    for loss in losses:

        log_file.write(f"\n=== Starting experiments for loss function: {loss} ===\n")
        print(f"\n=== Starting experiments for loss function: {loss} ===")  # Print to notebook

        for pred_len in pred_lens:
            log_file.write(f"\n=== Starting experiments for pred_len: {pred_len} ===\n")
            print(f"\n=== Starting experiments for pred_len: {pred_len} ===")
            model_id = f"{country}_{seq_len}_{pred_len}_loss_choice_for_{country}"

            # Run command with --itr 2 to ensure 2 iterations are handled internally
            command = f"""
            python {script_path} \
              --random_seed 2021 \
              --is_training 1 \
              --root_path "{data_path}" \
              --data_path "{dataset}" \
              --model_id {model_id} \
              --model "{model}" \
              --data "custom" \
              --features M \
              --seq_len {seq_len} \
              --label_len 5 \
              --pred_len {pred_len} \
              --e_layers 2 \
              --d_layers 1 \
              --factor 5 \
              --enc_in 5 \
              --dec_in 5 \
              --c_out 5 \
              --des 'Exp' \
              --train_epochs 10 \
              --patience 3 \
              --overlapping_windows \
              --loss_fnc "{loss}" \
              --itr {itr} --batch_size 32 --learning_rate "{lr}"
            """

            # Run the command and capture the output
            process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)

            # Capture the output in real-time
            output = []
            for line in process.stdout:
                output.append(line)
                print(line, end='')  # Print in the .ipynb cell
                log_file.write(line)  # Write to the log file

            # Wait for the process to complete
            process.wait()

            # Combine the output into a single string for easier pattern matching
            output_str = "".join(output)

            # Extract metrics for each iteration from the captured output
            iteration_metrics = extract_metrics_from_output(output_str, itr)

            # Log the extracted metrics and save them
            for iteration, metrics in enumerate(iteration_metrics, start=1):
                log_file.write(f"\nExtracted Metrics for {country}, pred_len={pred_len}, iteration={iteration}:\n")
                log_file.write(f"MSE: {metrics['MSE']}, RMSE: {metrics['RMSE']}, MAE: {metrics['MAE']}, RSE: {metrics['RSE']}\n")

                # Append the results to the informer_results list
                informer_results.append({
                    'Loss_function': loss,
                    'Pred_len': pred_len,
                    'Iteration': iteration,
                    'MSE': metrics['MSE'],
                    'RMSE': metrics['RMSE'],
                    'MAE': metrics['MAE'],
                    'RSE': metrics['RSE']
                })


=== Starting experiments for loss function: MSE ===

=== Starting experiments for pred_len: 24 ===
Args in experiment:
Namespace(random_seed=2021, is_training=1, model_id='DE_96_24_loss_choice_for_DE', model='Informer', data='custom', root_path='/vol/cs-hu/riabchuv/my_work/datasets/', data_path='DE_data.csv', features='M', target='OT', freq='h', checkpoints='./checkpoints/', overlapping_windows=True, seq_len=96, label_len=5, pred_len=24, inverse=False, loss_fnc='MSE', fc_dropout=0.05, head_dropout=0.0, patch_len=16, stride=8, padding_patch='end', revin=1, affine=0, subtract_last=0, decomposition=0, kernel_size=25, individual=0, embed_type=0, enc_in=5, dec_in=5, c_out=5, d_model=512, n_heads=8, e_layers=2, d_layers=1, d_ff=2048, moving_avg=25, factor=5, distil=True, dropout=0.05, embed='timeF', activation='gelu', output_attention=False, do_predict=False, num_workers=10, itr=2, train_epochs=10, batch_size=32, patience=3, learning_rate=0.0001, des='Exp', lradj='type3', pct_start=0.3, use

KeyboardInterrupt: 

In [3]:
# Dynamic variables
pred_lens = ["24", "96", "168"]
seq_len = "96"
lr = "0.0001"
itr = 2  
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

informer_results = []
# Define a function to extract metrics for both iterations
def extract_metrics_from_output(output, itr):
    # Define a combined pattern to capture all metrics in a single match
    pattern = re.compile(
        r"mse:\s*([\d.]+),\s*rmse:\s*([\d.]+),\s*mae:\s*([\d.]+),\s*rse:\s*([\d.]+)",
        re.IGNORECASE
    )
    
    # Join the output lines into a single string for easier regex matching
    output_str = "\n".join(output)
    
    # Debug: Print the output string to ensure it contains the metrics
    #print("Captured Output:\n", output_str)
    
    # Find all matches of the combined metric pattern
    matches = pattern.findall(output_str)
    
    # Ensure we have enough matches for the number of iterations requested
    if len(matches) < itr:
        raise ValueError(f"Expected at least {itr} iterations, but found only {len(matches)}.")
    
    # Convert each match to a tuple of floats and return the first 'itr' matches
    return [tuple(map(float, match)) for match in matches[:itr]]

# Example usage: running the subprocess and capturing the output
with open(log_file_path, "w") as log_file:
    for loss in losses:
        statement_1 = f"\n=== Starting experiments for loss function: {loss} ===\n"
        log_file.write(statement_1)
        print(statement_1)  # Print to notebook

        for pred_len in pred_lens:
            statement_2 = f"\n=== Starting experiments for pred_len: {pred_len} ===\n"
            log_file.write(statement_2)
            print(statement_2) 
            model_id = f"{country}_{seq_len}_{pred_len}_loss_choice_for_{country}"

            # Run command with --itr 2 to ensure 2 iterations are handled internally
            command = f"""
            python {script_path} \
              --random_seed 2021 \
              --is_training 1 \
              --root_path "{data_path}" \
              --data_path "{dataset}" \
              --model_id {model_id} \
              --model "{model}" \
              --data "custom" \
              --features M \
              --seq_len {seq_len} \
              --label_len 5 \
              --pred_len {pred_len} \
              --e_layers 2 \
              --d_layers 1 \
              --factor 5 \
              --enc_in 5 \
              --dec_in 5 \
              --c_out 5 \
              --des 'Exp' \
              --train_epochs 10 \
              --patience 3 \
              --overlapping_windows \
              --loss_fnc "{loss}" \
              --itr {itr} --batch_size 32 --learning_rate "{lr}"
            """

            # Run the command and capture the output
            process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)

            # Capture the output in real-time
            output = []
            for line in process.stdout:
                output.append(line)
                print(line, end='')  # Print in the .ipynb cell
                log_file.write(line)  # Write to the log file

            # Wait for the process to complete
            process.wait()

            # Combine the output into a single string for easier pattern matching
            output_str = "".join(output)

            # Extract metrics for each iteration from the captured output
            iteration_metrics = extract_metrics_from_output(output, itr)

            # Log the extracted metrics and save them
            for iteration, metrics in enumerate(iteration_metrics, start=1):
                log_file.write(f"\nExtracted Metrics for {country}, pred_len={pred_len}, iteration={iteration}:\n")
                log_file.write(f"MSE: {metrics[0]}, RMSE: {metrics[1]}, MAE: {metrics[2]}, RSE: {metrics[3]}\n")

                # Append the results to the informer_results list
                informer_results.append({
                    'Loss_function': loss,
                    'Pred_len': pred_len,
                    'Iteration': iteration,
                    'MSE': metrics[0],
                    'RMSE': metrics[1],
                    'MAE': metrics[2],
                    'RSE': metrics[3]
                })



=== Starting experiments for loss function: MSE ===

=== Starting experiments for pred_len: 24 ===
Args in experiment:
Namespace(random_seed=2021, is_training=1, model_id='DE_96_24_loss_choice_for_DE', model='Informer', data='custom', root_path='/vol/cs-hu/riabchuv/my_work/datasets/', data_path='DE_data.csv', features='M', target='OT', freq='h', checkpoints='./checkpoints/', overlapping_windows=True, seq_len=96, label_len=5, pred_len=24, inverse=False, loss_fnc='MSE', fc_dropout=0.05, head_dropout=0.0, patch_len=16, stride=8, padding_patch='end', revin=1, affine=0, subtract_last=0, decomposition=0, kernel_size=25, individual=0, embed_type=0, enc_in=5, dec_in=5, c_out=5, d_model=512, n_heads=8, e_layers=2, d_layers=1, d_ff=2048, moving_avg=25, factor=5, distil=True, dropout=0.05, embed='timeF', activation='gelu', output_attention=False, do_predict=False, num_workers=10, itr=2, train_epochs=10, batch_size=32, patience=3, learning_rate=0.0001, des='Exp', lradj='type3', pct_start=0.3, use

In [6]:
# Convert the collected data into a pandas DataFrame
informer_df = pd.DataFrame(informer_results)

# Set multi-index 
informer_df.set_index(['Loss_function', 'Iteration', 'Pred_len'], inplace=True)
path_dir = './dataset_results'

if not os.path.exists(path_dir):
    os.makedirs(path_dir)

informer_df.to_csv(f'{path_dir}/informer_loss_functions_results.csv', index=True)
informer_df.round(4)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,MSE,RMSE,MAE,RSE
Loss_function,Iteration,Pred_len,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
MSE,1,24,0.4846,0.6961,0.4748,0.5509
MSE,2,24,0.4871,0.6979,0.4672,0.5524
MSE,1,96,0.8256,0.9086,0.669,0.7206
MSE,2,96,0.8442,0.9188,0.6719,0.7287
MSE,1,168,0.8689,0.9321,0.6895,0.7384
MSE,2,168,0.9266,0.9626,0.7018,0.7626
RMSE,1,24,0.5057,0.7111,0.4947,0.5628
RMSE,2,24,0.4891,0.6993,0.4771,0.5535
RMSE,1,96,0.8275,0.9097,0.672,0.7215
RMSE,2,96,0.826,0.9089,0.6638,0.7208


In [7]:
informer_df

Unnamed: 0_level_0,Unnamed: 1_level_0,MSE,RMSE,MAE,RSE
Loss_function,Pred_len,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
MSE,24,0.5078,0.7126,0.4996,1.4263
MSE,96,0.8278,0.9098,0.6768,1.3444
MSE,168,0.925,0.9618,0.7023,1.3695
RMSE,24,0.5071,0.7121,0.4983,1.4291
RMSE,96,0.8019,0.8955,0.6663,1.344
RMSE,168,0.8935,0.9452,0.6883,1.3732
MAE,24,0.4949,0.7035,0.4628,1.5201
MAE,96,0.952,0.9757,0.6667,1.4635
MAE,168,0.9182,0.9582,0.6792,1.4107


# Test for PatchTST

In [7]:
# Dynamic variables
pred_lens = ["24", "96", "168"]
seq_len = "512"
lr = "0.0001"
model = "PatchTST"
itr = 2  
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

log_file_path = f"{log_dir}/{model}_{country}.log"

patchtst_results = []

# Example usage: running the subprocess and capturing the output
with open(log_file_path, "w") as log_file:
    for loss in losses:
        statement_1 = f"\n=== Starting experiments for loss function: {loss} ===\n"
        log_file.write(statement_1)
        print(statement_1)  # Print to notebook

        for pred_len in pred_lens:
            statement_2 = f"\n=== Starting experiments for pred_len: {pred_len} ===\n"
            log_file.write(statement_2)
            print(statement_2) 
            model_id = f"{country}_{seq_len}_{pred_len}_loss_choice_for_{country}"

            # Run command with --itr 2 to ensure 2 iterations are handled internally
            command = f"""
            python {script_path} \
              --random_seed 2021 \
              --is_training 1 \
              --root_path "{data_path}" \
              --data_path "{dataset}" \
              --model_id {model_id} \
              --model "{model}" \
              --data "custom" \
              --features M \
              --seq_len {seq_len} \
              --label_len 5 \
              --pred_len {pred_len} \
              --e_layers 2 \
              --d_layers 1 \
              --factor 5 \
              --enc_in 5 \
              --dec_in 5 \
              --c_out 5 \
              --des 'Exp' \
              --train_epochs 10 \
              --patience 3 \
              --overlapping_windows \
              --loss_fnc "{loss}" \
              --itr {itr} --batch_size 32 --learning_rate "{lr}"
            """

            # Run the command and capture the output
            process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)

            # Capture the output in real-time
            output = []
            for line in process.stdout:
                output.append(line)
                print(line, end='')  # Print in the .ipynb cell
                log_file.write(line)  # Write to the log file

            # Wait for the process to complete
            process.wait()

            # Combine the output into a single string for easier pattern matching
            output_str = "".join(output)

            # Extract metrics for each iteration from the captured output
            iteration_metrics = extract_metrics_from_output(output, itr)

            # Log the extracted metrics and save them
            for iteration, metrics in enumerate(iteration_metrics, start=1):
                log_file.write(f"\nExtracted Metrics for {country}, pred_len={pred_len}, iteration={iteration}:\n")
                log_file.write(f"MSE: {metrics[0]}, RMSE: {metrics[1]}, MAE: {metrics[2]}, RSE: {metrics[3]}\n")

                # Append the results to the patchtst_results list
                patchtst_results.append({
                    'Loss_function': loss,
                    'Pred_len': pred_len,
                    'Iteration': iteration,
                    'MSE': metrics[0],
                    'RMSE': metrics[1],
                    'MAE': metrics[2],
                    'RSE': metrics[3]
                })



=== Starting experiments for loss function: MSE ===


=== Starting experiments for pred_len: 24 ===

Args in experiment:
Namespace(random_seed=2021, is_training=1, model_id='DE_512_24_loss_choice_for_DE', model='PatchTST', data='custom', root_path='/vol/cs-hu/riabchuv/my_work/datasets/', data_path='DE_data.csv', features='M', target='OT', freq='h', checkpoints='./checkpoints/', overlapping_windows=True, seq_len=512, label_len=5, pred_len=24, inverse=False, loss_fnc='MSE', fc_dropout=0.05, head_dropout=0.0, patch_len=16, stride=8, padding_patch='end', revin=1, affine=0, subtract_last=0, decomposition=0, kernel_size=25, individual=0, embed_type=0, enc_in=5, dec_in=5, c_out=5, d_model=512, n_heads=8, e_layers=2, d_layers=1, d_ff=2048, moving_avg=25, factor=5, distil=True, dropout=0.05, embed='timeF', activation='gelu', output_attention=False, do_predict=False, num_workers=10, itr=2, train_epochs=10, batch_size=32, patience=3, learning_rate=0.0001, des='Exp', lradj='type3', pct_start=0.3,

In [8]:
# Convert the collected data into a pandas DataFrame
patchtst_df = pd.DataFrame(patchtst_results)

# Set multi-index 
patchtst_df.set_index(['Loss_function', 'Iteration', 'Pred_len'], inplace=True)
path_dir = './dataset_results'

if not os.path.exists(path_dir):
    os.makedirs(path_dir)

patchtst_df.to_csv(f'{path_dir}/patchtst_loss_functions_results.csv', index=True)
patchtst_df.round(4)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,MSE,RMSE,MAE,RSE
Loss_function,Iteration,Pred_len,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
MSE,1,24,0.4484,0.6696,0.4417,0.5299
MSE,2,24,0.4501,0.6709,0.4477,0.5309
MSE,1,96,0.7422,0.8615,0.6124,0.6833
MSE,2,96,0.7766,0.8812,0.624,0.6989
MSE,1,168,0.8117,0.901,0.647,0.7137
MSE,2,168,0.8048,0.8971,0.6461,0.7107
RMSE,1,24,0.4476,0.6691,0.4402,0.5295
RMSE,2,24,0.4471,0.6686,0.4456,0.5292
RMSE,1,96,0.7429,0.8619,0.6125,0.6836
RMSE,2,96,0.7808,0.8836,0.6239,0.7008
