To run this notebook, it is required that experiment results are available in `./results/` with the model results in `./results/baseline`, `./results/dataset-embeddings` and `./results/special-token`.

In [16]:
from collections import defaultdict
from functools import partial
from itertools import chain

import numpy as np

!pip install --upgrade pandas jinja2 --target /usr/local/lib/python3.8/site-packages/
import sys
sys.path = ['/usr/local/lib/python3.8/site-packages', '/shared-libs/python3.8/py-core/lib/python3.8/site-packages']
import pandas as pd


models = ["dataset-embeddings","special-token","baseline"]
domains = ["ai", "literature", "news", "politics", "science", "music"]
seeds = ["4012", "5096", "8878", "8857", "9908"]

M = len(domains)

data = defaultdict(partial(defaultdict, dict))
for model in models:
    for domain in domains:
        for seed in seeds:
            data[model][seed][domain] = pd.read_json(f"results/{model}/rs{seed}/{domain}-test-pred-results.json")

baseline = data['baseline']
special_token = data['special-token']
dataset_embeddings = data['dataset-embeddings']

Collecting pandas
  Using cached pandas-2.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.3 MB)
Collecting jinja2
  Using cached Jinja2-3.1.2-py3-none-any.whl (133 kB)
Collecting numpy>=1.20.3
  Using cached numpy-1.24.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
Collecting tzdata>=2022.1
  Using cached tzdata-2023.3-py2.py3-none-any.whl (341 kB)
Collecting pytz>=2020.1
  Using cached pytz-2023.3-py2.py3-none-any.whl (502 kB)
Collecting python-dateutil>=2.8.2
  Using cached python_dateutil-2.8.2-py2.py3-none-any.whl (247 kB)
Collecting MarkupSafe>=2.0
  Using cached MarkupSafe-2.1.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (25 kB)
Collecting six>=1.5
  Using cached six-1.16.0-py2.py3-none-any.whl (11 kB)
Installing collected packages: pytz, tzdata, six, numpy, MarkupSafe, python-dateutil, jinja2, pandas
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour i

In [23]:
domain_to_faicon = {
    "ai": "\\faRobot",
    "music": "\\faMusic",
    "literature": "\\faBookOpen",
    "science": "\\faLeaf",
    "news": "\\faNewspaper",
    "politics": "\\faLandmark",
    "avg": "\\textbf{avg.}",
}
model_names = ["Baseline", "Special tokens", "Dataset embeddings"]
results = {model_name: {} for model_name in model_names}

f1_types = ['micro avg', 'macro avg', "weighted avg"]

for model_name, model in zip(model_names, [baseline, special_token, dataset_embeddings]):
    for domain in domains:
        f1_scores = {f1: 0 for f1 in f1_types}
        for seed in seeds:
            for f1 in f1_types:
                f1_scores[f1] += model[seed][domain][f1]["f1-score"]
        f1_scores = {k: v / len(seeds) for k, v in f1_scores.items()}
        results[model_name][domain] = f1_scores
    results[model_name]["avg"] = {}
    for f1 in f1_types:
        results[model_name]["avg"][f1] = sum([results[model_name][domain][f1] for domain in domains]) / len(domains)

In [39]:
f1_types_sc = [r"\textsc{Micro-F1}", r"\textsc{Macro-f1}", r"\textsc{Weigh.-F1}"]
rows = [
    list(chain(*([f1] * 3 for f1 in f1_types_sc))),
    list(map(lambda x: fr"\textsc{{{x}}}", model_names)) * 3,
]
columns = {domain_to_faicon[domain]: [] for domain in domains + ['avg']}
for f1 in f1_types:
    for model in model_names:
        for domain, scores in results[model].items():
            columns[domain_to_faicon[domain]].append(scores[f1])
df = pd.DataFrame(columns, index=rows)

In [40]:
print(df.to_latex(float_format=lambda x: f"{x*100:.2f}"))

\begin{tabular}{llrrrrrrr}
\toprule
 &  & \faRobot & \faBookOpen & \faNewspaper & \faLandmark & \faLeaf & \faMusic & \textbf{avg.} \\
\midrule
\multirow[t]{3}{*}{\textsc{Micro-F1}} & \textsc{Baseline} & 51.67 & 70.90 & 54.75 & 61.36 & 47.46 & 76.21 & 60.39 \\
 & \textsc{Special tokens} & 49.97 & 70.60 & 54.14 & 61.84 & 47.74 & 76.17 & 60.08 \\
 & \textsc{Dataset embeddings} & 18.69 & 19.03 & 58.54 & 42.23 & 17.03 & 30.43 & 30.99 \\
\cline{1-9}
\multirow[t]{3}{*}{\textsc{Macro-f1}} & \textsc{Baseline} & 39.53 & 42.10 & 14.39 & 27.71 & 38.21 & 38.77 & 33.45 \\
 & \textsc{Special tokens} & 39.67 & 42.65 & 16.06 & 30.19 & 40.34 & 40.50 & 34.90 \\
 & \textsc{Dataset embeddings} & 1.88 & 1.90 & 12.73 & 3.68 & 1.72 & 2.78 & 4.12 \\
\cline{1-9}
\multirow[t]{3}{*}{\textsc{Weigh.-F1}} & \textsc{Baseline} & 48.81 & 66.08 & 48.97 & 57.55 & 45.48 & 72.35 & 56.54 \\
 & \textsc{Special tokens} & 47.43 & 66.45 & 49.30 & 58.44 & 45.48 & 72.79 & 56.65 \\
 & \textsc{Dataset embeddings} & 5.87 & 6.08 & 49

In [5]:
columns

{'\\faRobot': [0.5166812227074232,
  0.3952659928659104,
  0.4881182154259623,
  0.499737991266375,
  0.3967369433247557,
  0.47428309064524693,
  0.18689956331877702,
  0.018774400140369,
  0.058728519097733],
 '\\faBookOpen': [0.7089663760896633,
  0.4209886458667298,
  0.6608287822317773,
  0.7059775840597753,
  0.4265481627856881,
  0.6645195325364994,
  0.1902864259028642,
  0.018963108357698,
  0.0608059069614138],
 '\\faNewspaper': [0.547474747474747,
  0.1439476189460684,
  0.48974429965270516,
  0.5414141414141408,
  0.1606134164161212,
  0.49297127928308593,
  0.585353535353535,
  0.1273242789808344,
  0.4954593210036412],
 '\\faLandmark': [0.6136030341340073,
  0.2770932063025916,
  0.5755199848692574,
  0.618356510745891,
  0.3019009369432572,
  0.5843759639743215,
  0.4222503160556251,
  0.036847447155906,
  0.24625683256972403],
 '\\faLeaf': [0.47458583010222044,
  0.3820595112070718,
  0.45479626450439686,
  0.4774057102573135,
  0.4033726541580405,
  0.4548113174464475,

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=6c3be415-6b72-4ca4-8444-c76a32e3ce00' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>