In [1]:
import sys
sys.path.append('..')

In [2]:
import sentence_embedding_evaluation_german as seeg
import torch
from typing import List

## (1) Instantiate your Embedding model
First, you should load your pretrained embedding.

Here we will generate a random embedding for demonstration purposes.

In [3]:
# generate a random embedding
emb_dim = 512
vocab_sz = 128
emb = torch.randn((vocab_sz, emb_dim), requires_grad=False)
emb = torch.nn.Embedding.from_pretrained(emb)
# assert emb.weight.requires_grad == False

## (2) Specify the preprocessing
The `preprocessor` function converts a sentences as string into embedding vectors of numbers.

Here we will convert the input strings with a nonsensical approach into IDs for the Embedding layer.

In [4]:
def preprocesser(batch: List[str], params: dict=None) -> List[List[float]]:
    """ Specify your embedding or pretrained encoder here
    Paramters:
    ----------
    params : dict
        The params dictionary
    batch : List[str]
        A list of sentence as string
    Returns:
    --------
    List[List[float]]
        A list of embedding vectors
    """
    features = []
    for sent in batch:
        try:
            ids = torch.tensor([ord(c) % 128 for c in sent])
        except:
            print(sent)
        h = emb(ids)
        features.append(h.mean(axis=0))
    features = torch.stack(features, dim=0)
    return features

## (3) Training settings

In [5]:
params = {
    'datafolder': '../datasets',
    'bias': True,
    'balanced': True,
    'batch_size': 128, 
    'num_epochs': 10,  # Default: 500
    # 'early_stopping': True,
    # 'split_ratio': 0.2,  # if early_stopping=True
    # 'patience': 5,  # if early_stopping=True
}

## (4) Specify downstream tasks

In [6]:
# All
# downstream_tasks = [
#     'TOXIC', 'ENGAGE', 'FCLAIM', 'VMWE',
#     'OL19-A', 'OL19-B', 'OL19-C',
#     'OL18-A', 'OL18-B', 
#     'ABSD-1', 'ABSD-2', 'ABSD-3',
#     'MIO-S', 'MIO-O', 'MIO-I', 'MIO-D', 'MIO-F', 'MIO-P', 'MIO-A',
#     'SBCH-L', 'SBCH-S', 'ARCHI', 'LSDC'
# ]

# Group tasks
# downstream_tasks = [
#     'ABSD-2', 'MIO-S', 'SBCH-S',  # Sentiment analysis
#     'ENGAGE', 'MIO-P',  # engaging/personal
#     'FCLAIM', 'MIO-A',  # fact-claim (potential fake news), argumentative, reasoning
#     'TOXIC', 'OL19-A', 'OL19-B', 'OL19-C', 'MIO-O', 'MIO-I',  # toxic
# ]

# Current favorites
downstream_tasks = ['FCLAIM', 'VMWE', 'OL19-C', 'ABSD-2', 'MIO-P', 'ARCHI', 'LSDC']

## (5) Run experiments

In [7]:
%%time
results = seeg.evaluate(downstream_tasks, preprocesser, verbose=1, **params)

Dataset loaded: FCLAIM
epoch 1 | loss: 0.6967413677619054
epoch 2 | loss: 0.6878486275672913
epoch 3 | loss: 0.6872380467561575
epoch 4 | loss: 0.6854658172680781
epoch 5 | loss: 0.6833217831758353
epoch 6 | loss: 0.681429576415282
epoch 7 | loss: 0.6797747520300058
epoch 8 | loss: 0.6782860297423142
epoch 9 | loss: 0.6769298039949857
epoch 10 | loss: 0.6756867789305173
Dataset loaded: VMWE
epoch 1 | loss: 0.6948041491783582
epoch 2 | loss: 0.6866596215046369
epoch 3 | loss: 0.6880441628969632
epoch 4 | loss: 0.6878308264108804
epoch 5 | loss: 0.6872189847322611
epoch 6 | loss: 0.6865111199709085
epoch 7 | loss: 0.6857815465101829
epoch 8 | loss: 0.6850517953817661
epoch 9 | loss: 0.6843297607623614
epoch 10 | loss: 0.6836191461636469
Dataset loaded: OL19-C
epoch 1 | loss: 0.7060581818223
epoch 2 | loss: 0.6990385875105858
epoch 3 | loss: 0.6970023177564144
epoch 4 | loss: 0.69573974609375
epoch 5 | loss: 0.694473635405302
epoch 6 | loss: 0.6931687854230404
epoch 7 | loss: 0.6918717995

In [8]:
import json
dat = json.dumps(results, indent=2)
# print(dat)

## (6) Display results

In [9]:
print("Task | Epochs | N train | N test")
for res in results:
    print(f"{res['task']:>7s}: {res['epochs']:5d} {res['train']['num']:6d} {res['test']['num']:6d}")

Task | Epochs | N train | N test
 FCLAIM:    10   3244    944
   VMWE:    10   6652   1447
 OL19-C:    10   1921    930
 ABSD-2:    10  19432   2555
  MIO-P:    10   4668   4668
  ARCHI:    10  18809   4743
   LSDC:    10  74140   8602


In [10]:
metric = 'acc'  # 'f1', 'f1-balanced', 'acc', 'acc-balanced'
print('  Task | train | test')
for res in results:
    print(f"{res['task']:>7s}: {res['train'][metric]:6.3f} {res['test'][metric]:6.3f}")

  Task | train | test
 FCLAIM:  0.602  0.575
   VMWE:  0.827  0.823
 OL19-C:  0.837  0.826
 ABSD-2:  0.501  0.493
  MIO-P:  0.573  0.584
  ARCHI:  0.397  0.394
   LSDC:  0.550  0.444


In [11]:
metric = 'f1-balanced'  # 'f1', 'f1-balanced', 'acc', 'acc-balanced'
print('  Task | train | test')
for res in results:
    print(f"{res['task']:>7s}: {res['train'][metric]:6.3f} {res['test'][metric]:6.3f}")

  Task | train | test
 FCLAIM:  0.583  0.553
   VMWE:  0.453  0.451
 OL19-C:  0.468  0.470
 ABSD-2:  0.394  0.360
  MIO-P:  0.537  0.546
  ARCHI:  0.395  0.389
   LSDC:  0.292  0.274


In [12]:
# class label distributions (inference)
for res in results:
    print(res['task'], res['test']['num'])
    print(res['test']['distr-test'])
    print(res['test']['distr-pred'])

FCLAIM 944
{'0': '630', '1': '314'}
{'0': '523', '1': '421'}
VMWE 1447
{'1': '256', '0': '1191'}
{'0': '1447'}
OL19-C 930
{'0': '796', '1': '134'}
{'0': '896', '1': '34'}
ABSD-2 2555
{'1': '1670', '0': '780', '2': '105'}
{'0': '604', '1': '1386', '2': '565'}
MIO-P 4668
{'1': '812', '0': '3856'}
{'1': '2498', '0': '2170'}
ARCHI 4743
{'2': '1177', '3': '1176', '1': '1199', '0': '1191'}
{'2': '950', '0': '800', '3': '1541', '1': '1452'}
LSDC 8602
{'13': '280', '6': '346', '12': '940', '3': '925', '2': '944', '1': '934', '11': '931', '0': '453', '10': '511', '5': '924', '4': '65', '8': '923', '9': '83', '7': '343'}
{'5': '1270', '1': '974', '3': '1226', '11': '1269', '12': '888', '8': '1254', '0': '1231', '2': '490'}


In [13]:
# class label distribution (training)
for res in results:
    print(res['task'], res['train']['num'])
    print(res['train']['distr-train'])
    print(res['train']['distr-pred'])

FCLAIM 3244
{'0': '2141', '1': '1103'}
{'1': '1445', '0': '1799'}
VMWE 6652
{'1': '1145', '0': '5507'}
{'0': '6646', '1': '6'}
OL19-C 1921
{'0': '1664', '1': '257'}
{'0': '1857', '1': '64'}
ABSD-2 19432
{'2': '1179', '1': '13208', '0': '5045'}
{'2': '4867', '0': '4707', '1': '9858'}
MIO-P 4668
{'0': '3855', '1': '813'}
{'1': '2552', '0': '2116'}
ARCHI 18809
{'1': '4797', '3': '4407', '2': '4802', '0': '4803'}
{'1': '7240', '3': '4964', '0': '2748', '2': '3857'}
LSDC 74140
{'12': '15019', '8': '7829', '5': '13506', '1': '5294', '11': '13227', '3': '11002', '2': '5704', '13': '346', '10': '749', '7': '382', '9': '143', '0': '469', '6': '377', '4': '93'}
{'2': '4447', '8': '10813', '11': '14584', '3': '11093', '5': '13664', '12': '10009', '0': '4372', '1': '5052', '7': '106'}
