# <center>Visualizing Mean Square Errors of Transformers</center>

In [None]:
import os
import re

import matplotlib.pyplot as plt
import numpy as np

In [None]:
SAVE = True
SAVEDIR = 'img'
BAR_WIDTH = 0.15

loss_ptn = re.compile('.*\[TEST\]\\ttest_loss: (\d+.\d+)')

## Synthetic Datasets

In [None]:
CKPTDIR = '/usr2/home/yongyiw/ckpt/lstf/Synthetic'
D_MODEL = 512
D_FF = 2048

Let $\mathcal{N} \sim N(0, 1)$. 

+ `sinx`: $y = 10\sin x + \mathcal{N}$
+ `x`: $y = x + \mathcal{N}$
+ `sinx_x`: $y = 10\sin x + x + \mathcal{N}$
+ `sinx_sqrtx`: $y = 10\sin x + 20\sqrt{x - \min(x)} + \mathcal{N}$
+ `sinx_x2_sym`: $y = 10\sin x + (\frac{x}{50})^2 + \mathcal{N}$
+ `sinx_x2_asym`: $y = 10\sin x + (\frac{x - \min(x)}{30})^2 + \mathcal{N}$
+ `xsinx`: $y = e^{x \mod 4} \dot (10\sin x + \mathcal{N})$
+ `sinx_sin2x_sin4x`: $y = 10(\sin x + \sin 2x + \sin 4x) + \mathcal{N}$
+ `sinx_c`: $y = 10\sin x + (-1)^{\mathbb{I}[x \mod 16 < 8]} 30 + \mathcal{N}$

### Everything Together

#### With Trend

In [None]:
mses = []
datasets = ['x', 'sinx_x', 'sinx_sqrtx', 'sinx_x2_sym', 'sinx_x2_asym']

for dataset in datasets: 
    mses.append([])
    logs = open(os.path.join(CKPTDIR, dataset, 'exp.log')).readlines()
    for attn in ['autocorrelation', 'dot']: 
        mses[-1].append([])
        for lwin in [25, 0]: 
            mses[-1][-1].append([])
            config_ptn = re.compile('.*\[CONFIG\]\\t.*_attn{}_.*_dm{}_dff{}_.*_lw{}_'.format(attn, D_MODEL, D_FF, lwin))
            index = None
            for row in logs: 
                config_m = config_ptn.match(row)
                if config_m is not None: 
                    assert index is None
                    index = True
                loss_m = loss_ptn.match(row)
                if loss_m is not None and index is not None: 
                    mses[-1][-1][-1].append(float(loss_m[1]))
                    index = None
mses = np.array(mses).mean(-1)
mses.shape # dataset, auto/attn, decomp/norm

In [None]:
xticks = np.arange(len(datasets))

plt.figure(figsize=(12, 6))
plt.bar(xticks - 1.5 * BAR_WIDTH, mses[:, 1, 1], width=BAR_WIDTH, label='Transformer')
plt.bar(xticks - 0.5 * BAR_WIDTH, mses[:, 0, 1], width=BAR_WIDTH, label='+ AutoCorrelation')
plt.bar(xticks + 0.5 * BAR_WIDTH, mses[:, 1, 0], width=BAR_WIDTH, label='+ Decomposition')
plt.bar(xticks + 1.5 * BAR_WIDTH, mses[:, 0, 0], width=BAR_WIDTH, label='+ AutoCorrelation + Decomposition')
plt.xticks(xticks, labels=datasets)
plt.title('Trended Datasets (Univariate)')
plt.xlabel('Prediction Length')
plt.ylabel('Testing MSE')
plt.legend(loc='upper left')
if SAVE: 
    plt.savefig(os.path.join(SAVEDIR, 'mse_trended.png'))
plt.show()

#### Without Trend

In [None]:
mses = []
datasets = ['sinx', 'xsinx', 'sinx_sin2x_sin4x', 'sinx_c']

for dataset in datasets: 
    mses.append([])
    logs = open(os.path.join(CKPTDIR, dataset, 'exp.log')).readlines()
    for attn in ['autocorrelation', 'dot']: 
        mses[-1].append([])
        for lwin in [25, 0]: 
            mses[-1][-1].append([])
            config_ptn = re.compile('.*\[CONFIG\]\\t.*_attn{}_.*_dm{}_dff{}_.*_lw{}_'.format(attn, D_MODEL, D_FF, lwin))
            index = None
            for row in logs: 
                config_m = config_ptn.match(row)
                if config_m is not None: 
                    assert index is None
                    index = True
                loss_m = loss_ptn.match(row)
                if loss_m is not None and index is not None: 
                    mses[-1][-1][-1].append(float(loss_m[1]))
                    index = None
mses = np.array(mses).mean(-1)
mses.shape # dataset, auto/attn, decomp/norm

In [None]:
xticks = np.arange(len(datasets))

plt.figure(figsize=(12, 6))
plt.bar(xticks - 1.5 * BAR_WIDTH, mses[:, 1, 1], width=BAR_WIDTH, label='Transformer')
plt.bar(xticks - 0.5 * BAR_WIDTH, mses[:, 0, 1], width=BAR_WIDTH, label='+ AutoCorrelation')
plt.bar(xticks + 0.5 * BAR_WIDTH, mses[:, 1, 0], width=BAR_WIDTH, label='+ Decomposition')
plt.bar(xticks + 1.5 * BAR_WIDTH, mses[:, 0, 0], width=BAR_WIDTH, label='+ AutoCorrelation + Decomposition')
plt.xticks(xticks, labels=datasets)
plt.title('Untrended Datasets (Univariate)')
plt.xlabel('Prediction Length')
plt.ylabel('Testing MSE')
plt.legend(loc='upper left')
if SAVE: 
    plt.savefig(os.path.join(SAVEDIR, 'mse_untrended.png'))
plt.show()

### Decomposition Block

#### With Trend

In [None]:
mses = []
datasets = ['x', 'sinx_x', 'sinx_sqrtx', 'sinx_x2_sym', 'sinx_x2_asym']
len_windows = [0, 5, 13, 25, 51]
mapping = {lw: i for i, lw in enumerate(len_windows)}

for dataset in datasets: 
    mses.append([[] for _ in range(len(len_windows))])
    logs = open(os.path.join(CKPTDIR, dataset, 'exp.log')).readlines()
    config_ptn = re.compile('.*\[CONFIG\]\\t.*_attnautocorrelation_.*_dm{}_dff{}_.*_lw(\d+)_'.format(D_MODEL, D_FF, lwin))
    index = None
    for row in logs: 
        config_m = config_ptn.match(row)
        if config_m is not None: 
            assert index is None
            index = mapping[int(config_m[1])]
        loss_m = loss_ptn.match(row)
        if loss_m is not None and index is not None: 
            mses[-1][index].append(float(loss_m[1]))
            index = None
mses = np.array(mses).mean(-1)
mses.shape # dataset, len_window

In [None]:
xticks = np.arange(len(datasets))

plt.figure(figsize=(12, 6))
for j, lw in enumerate(len_windows): 
    if lw == 0: 
        label = '+ AutoCorrelation'
    else: 
        label = '+ AutoCorrelation + Decomposition: {}'.format(lw)
    plt.bar(xticks + (j - 2) * BAR_WIDTH, mses[:, j], width=BAR_WIDTH, label=label)

plt.ylim(0, 0.5)
plt.xticks(xticks, labels=datasets)
plt.title('LayerNorm vs Decomposition on Trended Datasets (Univariate)')
plt.xlabel('Dataset')
plt.ylabel('Testing MSE')
plt.legend(loc='upper left')
if SAVE: 
    plt.savefig(os.path.join(SAVEDIR, 'mse_trended_decomp.png'))
plt.show()

#### Without Trend

In [None]:
mses = []
datasets = ['sinx', 'xsinx', 'sinx_sin2x_sin4x', 'sinx_c']
len_windows = [0, 5, 13, 25, 51]
mapping = {lw: i for i, lw in enumerate(len_windows)}

for dataset in datasets: 
    mses.append([[] for _ in range(len(len_windows))])
    logs = open(os.path.join(CKPTDIR, dataset, 'exp.log')).readlines()
    config_ptn = re.compile('.*\[CONFIG\]\\t.*_attnautocorrelation_.*_dm{}_dff{}_.*_lw(\d+)_'.format(D_MODEL, D_FF, lwin))
    index = None
    for row in logs: 
        config_m = config_ptn.match(row)
        if config_m is not None: 
            assert index is None
            index = mapping[int(config_m[1])]
        loss_m = loss_ptn.match(row)
        if loss_m is not None and index is not None: 
            mses[-1][index].append(float(loss_m[1]))
            index = None
mses = np.array(mses).mean(-1)
mses.shape # dataset, len_window

In [None]:
xticks = np.arange(len(datasets))

plt.figure(figsize=(12, 6))
for j, lw in enumerate(len_windows): 
    if lw == 0: 
        label = '+ AutoCorrelation'
    else: 
        label = '+ AutoCorrelation + Decomposition: {}'.format(lw)
    plt.bar(xticks + (j - 2) * BAR_WIDTH, mses[:, j], width=BAR_WIDTH, label=label)

plt.xticks(xticks, labels=datasets)
plt.title('LayerNorm vs Decomposition on Untrended Datasets (Univariate)')
plt.xlabel('Dataset')
plt.ylabel('Testing MSE')
plt.legend(loc='upper left')
if SAVE: 
    plt.savefig(os.path.join(SAVEDIR, 'mse_untrended_decomp.png'))
plt.show()

### AutoCorrelation Block

#### With Trend

In [None]:
mses = []
datasets = ['x', 'sinx_x', 'sinx_sqrtx', 'sinx_x2_sym', 'sinx_x2_asym']
attns = ['dot', 'autocorrelation']
mapping = {attn: i for i, attn in enumerate(attns)}

for dataset in datasets: 
    mses.append([[] for _ in range(len(attns))])
    logs = open(os.path.join(CKPTDIR, dataset, 'exp.log')).readlines()
    config_ptn = re.compile('.*\[CONFIG\]\\t.*_mautoformer_attn([^_]*)_.*_dm{}_dff{}_.*_lw25_'.format(D_MODEL, D_FF))
    index = None
    for row in logs: 
        config_m = config_ptn.match(row)
        if config_m is not None: 
            assert index is None
            index = mapping[config_m[1]]
        loss_m = loss_ptn.match(row)
        if loss_m is not None and index is not None: 
            mses[-1][index].append(float(loss_m[1]))
            index = None
mses = np.array(mses).mean(-1)
mses.shape # dataset, attn

In [None]:
xticks = np.arange(len(datasets))

plt.figure(figsize=(12, 6))
plt.bar(xticks - BAR_WIDTH / 2, mses[:, 0], width=BAR_WIDTH, label='+ Decomposition')
plt.bar(xticks + BAR_WIDTH / 2, mses[:, 1], width=BAR_WIDTH, label='+ AutoCorrelation + Decomposition')

plt.ylim(0, 0.0175)
plt.title('Attention vs AutoCorrelation on Trended Datasets (Univariate)')
plt.xticks(xticks, labels=datasets)
plt.xlabel('Dataset')
plt.ylabel('Testing MSE')
plt.legend(loc='upper left')
if SAVE: 
    plt.savefig(os.path.join(SAVEDIR, 'mse_trended_attn.png'))
plt.show()

#### Without Trend

In [None]:
mses = []
datasets = ['sinx', 'xsinx', 'sinx_sin2x_sin4x', 'sinx_c']
attns = ['dot', 'autocorrelation']
mapping = {attn: i for i, attn in enumerate(attns)}

for dataset in datasets: 
    mses.append([[] for _ in range(len(attns))])
    logs = open(os.path.join(CKPTDIR, dataset, 'exp.log')).readlines()
    config_ptn = re.compile('.*\[CONFIG\]\\t.*_mautoformer_attn([^_]*)_.*_dm{}_dff{}_.*_lw25_'.format(D_MODEL, D_FF))
    index = None
    for row in logs: 
        config_m = config_ptn.match(row)
        if config_m is not None: 
            assert index is None
            index = mapping[config_m[1]]
        loss_m = loss_ptn.match(row)
        if loss_m is not None and index is not None: 
            mses[-1][index].append(float(loss_m[1]))
            index = None
mses = np.array(mses).mean(-1)
mses.shape # dataset, attn

In [None]:
xticks = np.arange(len(datasets))

plt.figure(figsize=(12, 6))
plt.bar(xticks - BAR_WIDTH / 2, mses[:, 0], width=BAR_WIDTH, label='+ Decomposition')
plt.bar(xticks + BAR_WIDTH / 2, mses[:, 1], width=BAR_WIDTH, label='+ AutoCorrelation + Decomposition')

plt.title('Attention vs AutoCorrelation on Untrended Datasets (Univariate)')
plt.xticks(xticks, labels=datasets)
plt.xlabel('Dataset')
plt.ylabel('Testing MSE')
plt.legend(loc='upper left')
if SAVE: 
    plt.savefig(os.path.join(SAVEDIR, 'mse_untrended_attn.png'))
plt.show()

## Real-World Datasets

In [None]:
CKPTDIR = '/usr2/home/yongyiw/ckpt/lstf'
N_LEN_PRED = 4
DATASETS = ['ETTm2', 'Electricity', 'Exchange', 'Traffic', 'Weather', 'ILI']

In [None]:
xticks = np.arange(N_LEN_PRED)

mses = []
for dataset in DATASETS: 
    logs = open(os.path.join(CKPTDIR, dataset, 'autoformer', 'exp.log')).readlines()
    mses.append([])
    for attn in ['autocorrelation', 'dot']: 
        mses[-1].append([])
        for lwin in [25, 0]: 
            config_ptn = re.compile('.*\[CONFIG\]\\t.*_lp(\d+)_.*_attn{}_.*_lw{}_'.format(attn, lwin))
            mses[-1][-1].append([[], [], [], []])
            index = None
            for row in logs: 
                config_m = config_ptn.match(row)
                if config_m is not None: 
                    assert index is None
                    if dataset != 'ILI': 
                        mapping = {96: 0, 192: 1, 336: 2, 720: 3}
                    else: 
                        mapping = {24: 0, 36:1, 48: 2, 60: 3}
                    index = mapping[int(config_m[1])]
                loss_m = loss_ptn.match(row)
                if loss_m is not None and index is not None: 
                    mses[-1][-1][-1][index].append(float(loss_m[1]))
                    index = None
mses = np.array(mses).mean(-1)
mses.shape # dataset, auto/attn, decomp/norm, len_pred

In [None]:
offset = {
    'Transformer': -1.5, 
    '+ AutoCorrelation': -0.5, 
    '+ Decomposition': 0.5, 
    '+ AutoCorrelation + Decomposition': 1.5
}

for i, dataset in enumerate(DATASETS): 
    plt.figure(figsize=(12, 6))
    plt.bar(xticks - 1.5 * BAR_WIDTH, mses[i][1][1], width=BAR_WIDTH, label='Transformer')
    plt.bar(xticks - 0.5 * BAR_WIDTH, mses[i][0][1], width=BAR_WIDTH, label='+ AutoCorrelation')
    plt.bar(xticks + 0.5 * BAR_WIDTH, mses[i][1][0], width=BAR_WIDTH, label='+ Decomposition')
    plt.bar(xticks + 1.5 * BAR_WIDTH, mses[i][0][0], width=BAR_WIDTH, label='+ AutoCorrelation + Decomposition')

    plt.ylim(0, 1.25 * np.max(mses[i]))
    plt.xticks(xticks, labels=[96, 192, 336, 720] if dataset != 'ILI' else [24, 36, 48, 60])
    plt.title('{} Dataset (Multivariate)'.format(dataset))
    plt.xlabel('Prediction Length')
    plt.ylabel('Testing MSE')
    plt.legend(loc='upper left')
    if SAVE: 
        plt.savefig(os.path.join(SAVEDIR, 'mse_{}.png'.format(dataset)))
    plt.show()