In [1]:
import sklearn.metrics
from collections import defaultdict
import pickle
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_context("talk")

setups = [("size", [150, 125, 100, 75, 50, 25, 10, 5]),
          ("dropout", [0, 0.1, 0.25, 0.5, 0.65, 0.75, 0.85, 0.9]),
          ("beta", [0, 2.5e-05, 0.000625, 0.00125, 0.0025, 0.00625, 0.0125, 0.025])]

### 1. Visualise main task performance, Figure 5 in Section 5

In [4]:
def load(filename):
    sentences, labels, test_pred, _, hidden = pickle.load(open(filename, 'rb'))
    test_pred=[np.argmax(x) for x in test_pred]
    accuracy = sklearn.metrics.accuracy_score(labels, [x for x in test_pred])
    f1 = sklearn.metrics.f1_score(labels, [x for x in test_pred], average="macro")
    return round(accuracy, 2), round(f1, 2)

plt.figure(figsize=(5, 5))
blue = sns.color_palette("crest_r", 3)[0]
purple = sns.color_palette("flare_r", 3)[0]

for setup, weights in setups:
    x, y_acc, y_f1, y_bl_acc, y_bl_f1, y2_acc, y2_f1 = [], [], [], [], [], [], []
    for i, beta in enumerate(weights):
        for seed in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]:
            prefix = "../../tree_lstms/checkpoints/sentiment/treelstm_bottleneck_seed"
            if i == 0:
                model = f"{prefix}={seed}/setup=regular/test_beta=0.0.pickle"
            else:
                if setup == "beta":
                    model = f"{prefix}={seed}/setup=regular/test_{setup}={beta}.pickle"
                else:
                    model = f"{prefix}={seed}/setup=regular/test_{setup}={beta}.pickle"
            accuracy, f1 = load(model)
            x.append(i)
            y_acc.append(accuracy)
            y_f1.append(f1)

            if i == 0:
                model = f"{prefix}={seed}/setup=regular/test_beta=0.0.pickle"
            else:
                if setup == "beta":
                    model = f"{prefix}={seed}/setup=regular/test_taught_{setup}={beta}.pickle"
                else:
                    model = f"{prefix}={seed}/setup=regular/test_taught_{setup}={beta}.pickle"
                accuracy, f1 = load(model)
            y2_acc.append(accuracy)
            y2_f1.append(f1)            

            model = f"../../tree_lstms/checkpoints/sentiment/baseline_seed={seed}/test_beta=0.0.pickle"
            accuracy, f1 = load(model)
            y_bl_acc.append(accuracy)
            y_bl_f1.append(f1)

    plt.figure(figsize=(3.5, 3.5))
    ax = sns.lineplot(x=x, y=y_acc, color=blue, linewidth=4, label="BCM-PP")
    sns.lineplot(x=x, y=y2_acc, color=purple, label="BCM-TT")
    sns.lineplot(x=x, y=y_f1, color=blue, ax=ax, linestyle="--", linewidth=3)
    sns.lineplot(x=x, y=y2_f1, color=purple, ax=ax, linestyle="--")
    sns.lineplot(x=x, y=y_bl_acc, color="grey", ax=ax, err_style=None, zorder=-1)
    sns.lineplot(x=x, y=y_bl_f1, color="grey", ax=ax, linestyle="--", err_style=None, zorder=-1)

    ax.set_xticks([0, 1, 2, 3, 4, 5, 6, 7])
    ax.set_xticklabels([str(w).replace("0.", ".") for w in weights], rotation=90)
    if setup != "beta":
        plt.yticks([])
        plt.ylabel("")
    else:
        plt.ylabel("performance")
    if setup != "size":
        plt.legend([], [], frameon=False)
    else:
        plt.legend(fontsize=14, frameon=False)
    plt.xlabel(setup.replace("beta", r"$\beta$"))
    # plt.text(3.9, 0.503, "Tai et al.", fontsize=10)
    if setup == "dropout":
        plt.text(0.06, 0.37, "sentiment-only baseline", fontsize=11)
    plt.ylim(0.25, 0.56)
    plt.xlim(0, 7)
    plt.savefig(f"figures/performance_{setup}.pdf", bbox_inches="tight")
    plt.show()

FileNotFoundError: [Errno 2] No such file or directory: '../../tree_lstms/checkpoints/sentiment/treelstm_bottleneck_seed=1/setup=regular/test_beta=0.0.pickle'

<Figure size 500x500 with 0 Axes>

## 2. Visualise performance on hard and small splits, Figure 9 in Section 5 and Ap. B

In [3]:


def clean(s):
    s = s.replace("(5", "").replace("(4", "").replace(
        "(3","").replace("(2", "").replace("(1", "").replace("(0", "").replace(")", "")
    s = s.replace("( 5", "").replace("( 4", "").replace(
        "( 3","").replace("( 2", "").replace("( 1", "").replace("( 0", "").replace(")", "")
    return ' '.join(s.split())


def load(filename):
    sentences, labels, test_pred = pickle.load(open(filename, 'rb'))
    accuracy = sklearn.metrics.accuracy_score(labels, test_pred)
    f1 = sklearn.metrics.f1_score(labels, test_pred, average="macro")
    return test_pred, accuracy, f1


for bottleneck in ["size"]:
    for metric in ["bcm-pp",]:
        for model in ["Roberta", "LSTM"]:
            str_ = model
            for setup in ["compositional", "non-compositional", "random"]:
                accs = []
                f1s = []
                for seed in [1, 2, 3, 4, 5]:
                    path = f"../../sentiment_training/checkpoints/sentiment_{model}_{setup}_seed={seed}_hard_splits/hard_split_metric={metric}_bottleneck={bottleneck}"
                    _, acc, f1 = load(
                        f"{path}/test.pickle")
                    accs.append(acc)
                    f1s.append(f1)
                acc = np.mean(accs)
                f1 = np.mean(f1s)
                str_ += f"& {acc:.3f} & {f1:.3f}".replace('0.', '.')
            str_ += "\\\\"
            print(str_)
    

Roberta& .546 & .535& .516 & .487& .565 & .549\\
LSTM& .505 & .485& .394 & .310& .478 & .447\\


In [4]:

ratios = [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5]
for model in ["Roberta", "LSTM" ]:
    for metric in ["bcm-pp", "bcm-tt"]:
        for bottleneck in ["size", "dropout", "beta"]:
            x, y_comp_acc, y_comp_f1, y_noncomp_acc, y_noncomp_f1 = [], [], [], [], []
            seeds = [1, 2, 3, 4, 5]
            for seed in seeds:
                for ratio in ratios:
                    model2 = f"../../sentiment_training/checkpoints/sentiment_{model}_side=compositional_ratio={ratio}_seed={seed}_subsets"
                    a, acc, f1 = load(
                            f"{model2}/subsets_metric={metric}_bottleneck={bottleneck}/test.pickle")
                    y_comp_acc.append(acc)
                    y_comp_f1.append(f1)
                    model2 = f"../../sentiment_training/checkpoints/sentiment_{model}_side=non-compositional_ratio={ratio}_seed={seed}_subsets"
                    b, acc, f1 = load(f"{model2}/subsets_metric={metric}_bottleneck={bottleneck}/test.pickle")
                    y_noncomp_acc.append(acc)
                    y_noncomp_f1.append(f1)
                    x.append(ratio)

            plt.figure(figsize=(5, 4))
            colors = sns.color_palette("crest", 3)
            blue = colors[-1]
            green = colors[0]
            ax1 = sns.lineplot(x=x, y=y_comp_acc, color=blue, label="comp.")
            ax2 = sns.lineplot(x=x, y=y_noncomp_acc, color=green, label="non-comp.")
            ax1 = sns.lineplot(x=x, y=y_comp_f1, color=blue, linestyle='--')
            ax2 = sns.lineplot(x=x, y=y_noncomp_f1, color=green, linestyle='--')

            plt.xlabel(r"training ratio")
            ax2.set_xticks(ratios)
            ax2.set_xticklabels(ratios, rotation=90)
            plt.ylabel("performance")

            if model == "LSTM":
                plt.legend([], [], frameon=False)
            else:
                plt.legend(frameon=False)
            plt.xlim(0.01, 0.5)
            plt.ylim(0.15, 0.60)
            plt.savefig(f"figures/{model}_metric={metric}_bottleneck={bottleneck}.pdf", bbox_inches="tight")
            plt.show()

FileNotFoundError: [Errno 2] No such file or directory: '../../sentiment_training/checkpoints/sentiment_Roberta_side=non-compositional_ratio=0.1_seed=1_subsets/subsets_metric=bcm-pp_bottleneck=size/test.pickle'