In [1]:
import sys
sys.path.append('..')

In [14]:
import sentence_embedding_evaluation_german as seeg
import torch
from typing import List

## (1) Instantiate your Embedding model
First, you should load your pretrained embedding.

Here we will generate a random embedding for demonstration purposes.

In [3]:
# generate a random embedding
emb_dim = 512
vocab_sz = 128
emb = torch.randn((vocab_sz, emb_dim), requires_grad=False)
emb = torch.nn.Embedding.from_pretrained(emb)
# assert emb.weight.requires_grad == False

## (2) Specify the preprocessing
The `preprocessor` function converts a sentences as string into embedding vectors of numbers.

Here we will convert the input strings with a nonsensical approach into IDs for the Embedding layer.

In [4]:
def preprocesser(batch: List[str], params: dict=None) -> List[List[float]]:
    """ Specify your embedding or pretrained encoder here
    Paramters:
    ----------
    params : dict
        The params dictionary
    batch : List[str]
        A list of sentence as string
    Returns:
    --------
    List[List[float]]
        A list of embedding vectors
    """
    features = []
    for sent in batch:
        try:
            ids = torch.tensor([ord(c) % 128 for c in sent])
        except:
            print(sent)
        h = emb(ids)
        features.append(h.mean(axis=0))
    features = torch.stack(features, dim=0)
    return features

## (3) Training settings

In [5]:
params = {
    'datafolder': '../datasets',
    'batch_size': 64, 
    'num_epochs': 20,
    # 'early_stopping': True,
    # 'split_ratio': 0.2,  # if early_stopping=True
    # 'patience': 5,  # if early_stopping=True
}

## (4) Specify downstream tasks

In [6]:
# All
# downstream_tasks = [
#     'TOXIC', 'ENGAGE', 'FCLAIM', 'VMWE',
#     'OL19-A', 'OL19-B', 'OL19-C',
#     'OL18-A', 'OL18-B', 
#     'ABSD-1', 'ABSD-2', 'ABSD-3',
#     'MIO-S', 'MIO-O', 'MIO-I', 'MIO-D', 'MIO-F', 'MIO-P', 'MIO-A',
#     'SBCH-L', 'SBCH-S', 'ARCHI', 'LSDC'
# ]

# Group tasks
# downstream_tasks = [
#     'ABSD-2', 'MIO-S', 'SBCH-S',  # Sentiment analysis
#     'ENGAGE', 'MIO-P',  # engaging/personal
#     'FCLAIM', 'MIO-A',  # fact-claim (potential fake news), argumentative, reasoning
#     'TOXIC', 'OL19-A', 'OL19-B', 'OL19-C', 'MIO-O', 'MIO-I',  # toxic
# ]

# Current favorites
downstream_tasks = ['FCLAIM', 'VMWE', 'OL19-C', 'ABSD-2', 'MIO-P', 'ARCHI', 'LSDC']

## (5) Run experiments

In [7]:
%%time
results = seeg.evaluate(downstream_tasks, preprocesser, **params)

nan
CPU times: user 1min 34s, sys: 9.25 s, total: 1min 43s
Wall time: 1min 49s


## (6) Display results

In [8]:
[(res['task'], res['epochs'], res['train']['num'], res['test']['num']) for res in results]

[('FCLAIM', 20, 3244, 944),
 ('VMWE', 20, 6652, 1447),
 ('OL19-C', 20, 1921, 930),
 ('ABSD-2', 20, 19432, 2555),
 ('MIO-P', 20, 4668, 4668),
 ('ARCHI', 20, 18809, 4743),
 ('LSDC', 20, 74140, 8602)]

In [9]:
metric = 'f1'
mode = 'train'

[(res['task'], res[mode][metric]) for res in results]

[('FCLAIM', 0.6670776818742293),
 ('VMWE', 0.8278713168971737),
 ('OL19-C', 0.8662155127537741),
 ('ABSD-2', 0.6797035817208728),
 ('MIO-P', 0.8258354755784063),
 ('ARCHI', 0.40501887394332503),
 ('LSDC', 0.5186673860264365)]

In [10]:
metric = 'f1'
mode = 'test'

[(res['task'], res[mode][metric]) for res in results]

[('FCLAIM', 0.6705508474576272),
 ('VMWE', 0.8230822391154112),
 ('OL19-C', 0.8559139784946237),
 ('ABSD-2', 0.6536203522504892),
 ('MIO-P', 0.8260497000856898),
 ('ARCHI', 0.39869281045751637),
 ('LSDC', 0.3171355498721228)]

In [11]:
metric = 'f1-balanced'
mode = 'test'

[(res['task'], res[mode][metric]) for res in results]

[('FCLAIM', 0.4192148290289716),
 ('VMWE', 0.4514783927217589),
 ('OL19-C', 0.4611819235225956),
 ('ABSD-2', 0.2635108481262327),
 ('MIO-P', 0.4523697794462694),
 ('ARCHI', 0.3952927574143255),
 ('LSDC', 0.1336177186212118)]

In [12]:
# class label distribution (training)
for res in results:
    print(res['task'], res['train']['num'])
    print(res['train']['distr-train'])
    print(res['train']['distr-pred'])

FCLAIM 3244
{0: 2141, 1: 1103}
{0: 3211, 1: 33}
VMWE 6652
{1: 1145, 0: 5507}
{0: 6652}
OL19-C 1921
{0: 1664, 1: 257}
{0: 1921}
ABSD-2 19432
{2: 1179, 1: 13208, 0: 5045}
{1: 19432}
MIO-P 4668
{0: 3855, 1: 813}
{0: 4668}
ARCHI 18809
{1: 4797, 3: 4407, 2: 4802, 0: 4803}
{1: 7579, 2: 3823, 0: 3163, 3: 4244}
LSDC 74140
{12: 15019, 8: 7829, 5: 13506, 1: 5294, 11: 13227, 3: 11002, 2: 5704, 13: 346, 10: 749, 7: 382, 9: 143, 0: 469, 6: 377, 4: 93}
{5: 16566, 12: 22331, 11: 18113, 3: 16026, 2: 1076, 7: 27, 8: 1}


In [13]:
# class label distributions (inference)
for res in results:
    print(res['task'], res['test']['num'])
    print(res['test']['distr-test'])
    print(res['test']['distr-pred'])

FCLAIM 944
{0: 630, 1: 314}
{0: 935, 1: 9}
VMWE 1447
{1: 256, 0: 1191}
{0: 1447}
OL19-C 930
{0: 796, 1: 134}
{0: 930}
ABSD-2 2555
{1: 1670, 0: 780, 2: 105}
{1: 2555}
MIO-P 4668
{1: 812, 0: 3856}
{0: 4668}
ARCHI 4743
{2: 1177, 3: 1176, 1: 1199, 0: 1191}
{2: 868, 0: 907, 3: 1270, 1: 1698}
LSDC 8602
{13: 280, 6: 346, 12: 940, 3: 925, 2: 944, 1: 934, 11: 931, 0: 453, 10: 511, 5: 924, 4: 65, 8: 923, 9: 83, 7: 343}
{5: 1606, 3: 2279, 12: 2569, 11: 2064, 2: 84}
