In [1]:
import sys
sys.path.append('..')

In [2]:
import sentence_embedding_evaluation_german as seeg
import torch
from typing import List

  from .autonotebook import tqdm as notebook_tqdm


## (1) Instantiate your Embedding model
First, you should load your pretrained embedding.

Here we will generate a random embedding for demonstration purposes.

In [3]:
# generate a random embedding
emb_dim = 512
vocab_sz = 128
emb = torch.randn((vocab_sz, emb_dim), requires_grad=False)
emb = torch.nn.Embedding.from_pretrained(emb)
# assert emb.weight.requires_grad == False

## (2) Specify the preprocessing
The `preprocessor` function converts a sentences as string into embedding vectors of numbers.

Here we will convert the input strings with a nonsensical approach into IDs for the Embedding layer.

In [4]:
def preprocesser(batch: List[str], params: dict=None) -> List[List[float]]:
    """ Specify your embedding or pretrained encoder here
    Paramters:
    ----------
    params : dict
        The params dictionary
    batch : List[str]
        A list of sentence as string
    Returns:
    --------
    List[List[float]]
        A list of embedding vectors
    """
    features = []
    for sent in batch:
        try:
            ids = torch.tensor([ord(c) % 128 for c in sent])
        except:
            print(sent)
        h = emb(ids)
        features.append(h.mean(axis=0))
    features = torch.stack(features, dim=0)
    return features

## (3) Training settings

In [5]:
params = {
    'datafolder': '../datasets',
    'batch_size': 64, 
    'num_epochs': 20,
    # 'early_stopping': True,
    # 'split_ratio': 0.2,  # if early_stopping=True
    # 'patience': 5,  # if early_stopping=True
}

## (4) Specify downstream tasks

In [6]:
# All
# downstream_tasks = [
#     'TOXIC', 'ENGAGE', 'FCLAIM', 'VMWE',
#     'OL19-A', 'OL19-B', 'OL19-C',
#     'OL18-A', 'OL18-B', 
#     'ABSD-1', 'ABSD-2', #'ABSD-3',
#     'MIO-S', 'MIO-O', 'MIO-I', 'MIO-D', 'MIO-F', 'MIO-P', 'MIO-A',
#     'SBCH-L', 'SBCH-S'
# ]

# Group tasks
# downstream_tasks = [
#     'ABSD-2', 'MIO-S', 'SBCH-S',  # Sentiment analysis
#     'ENGAGE', 'MIO-P',  # engaging/personal
#     'FCLAIM', 'MIO-A',  # fact-claim (potential fake news), argumentative, reasoning
#     'TOXIC', 'OL19-A', 'OL19-B', 'OL19-C', 'MIO-O', 'MIO-I',  # toxic
# ]

# Current favorites
downstream_tasks = ['FCLAIM', 'VMWE', 'OL19-C', 'ABSD-2', 'MIO-P', 'SBCH-L']

## (5) Run experiments

In [7]:
%%time
results = seeg.evaluate(downstream_tasks, preprocesser, **params)

CPU times: user 29.7 s, sys: 3.26 s, total: 33 s
Wall time: 30.7 s


## (6) Display results

In [8]:
[(res['task'], res['epochs'], res['train']['num'], res['test']['num']) for res in results]

[('FCLAIM', 20, 3244, 944),
 ('VMWE', 20, 6651, 1446),
 ('OL19-C', 20, 1920, 929),
 ('ABSD-2', 20, 19431, 2554),
 ('MIO-P', 20, 4668, 4668),
 ('SBCH-L', 20, 748, 748)]

In [9]:
metric = 'f1'
mode = 'train'

[(res['task'], res[mode][metric]) for res in results]

[('FCLAIM', 0.6615289765721332),
 ('VMWE', 0.8279957901067508),
 ('OL19-C', 0.8661458333333333),
 ('ABSD-2', 0.6797385620915033),
 ('MIO-P', 0.8258354755784063),
 ('SBCH-L', 0.6951871657754011)]

In [10]:
metric = 'f1'
mode = 'test'

[(res['task'], res[mode][metric]) for res in results]

[('FCLAIM', 0.6673728813559322),
 ('VMWE', 0.8236514522821576),
 ('OL19-C', 0.8557588805166847),
 ('ABSD-2', 0.6534847298355521),
 ('MIO-P', 0.8260497000856898),
 ('SBCH-L', 0.6684491978609626)]

In [11]:
metric = 'f1-balanced'
mode = 'test'

[(res['task'], res[mode][metric]) for res in results]

[('FCLAIM', 0.4092474489795918),
 ('VMWE', 0.45164960182025027),
 ('OL19-C', 0.4611368909512761),
 ('ABSD-2', 0.26347778040887204),
 ('MIO-P', 0.4523697794462694),
 ('SBCH-L', 0.6584474885844749)]

In [12]:
# class label distribution (training)
[(res['task'], res['train']['num'], res['train']['distr-train'], res['train']['distr-pred']) for res in results]

[('FCLAIM', 3244, {0: 2141, 1: 1103}, {0: 3219, 1: 25}),
 ('VMWE', 6651, {1: 1144, 0: 5507}, {0: 6651}),
 ('OL19-C', 1920, {0: 1663, 1: 257}, {0: 1920}),
 ('ABSD-2', 19431, {1: 13208, 0: 5045, 2: 1178}, {1: 19431}),
 ('MIO-P', 4668, {0: 3855, 1: 813}, {0: 4668}),
 ('SBCH-L', 748, {1: 403, 0: 345}, {1: 479, 0: 269})]

In [13]:
# class label distributions (inference)
[(res['task'], res['test']['num'], res['test']['distr-test'], res['test']['distr-pred']) for res in results]

[('FCLAIM', 944, {0: 630, 1: 314}, {0: 938, 1: 6}),
 ('VMWE', 1446, {1: 255, 0: 1191}, {0: 1446}),
 ('OL19-C', 929, {0: 795, 1: 134}, {0: 929}),
 ('ABSD-2', 2554, {1: 1669, 0: 780, 2: 105}, {1: 2554}),
 ('MIO-P', 4668, {1: 812, 0: 3856}, {0: 4668}),
 ('SBCH-L', 748, {0: 346, 1: 402}, {1: 474, 0: 274})]