# Balanced Language Sampling for Multilingual Models

This Jupyter-notebook provides the visualization of the experiments in the term paper «Balanced Language Sampling for Multilingual Models».

## Requirements

In this section we download and import all needed modules and libraries.

In [None]:
%pip install -U kaleido

In [None]:
%pip install pyconll

In [None]:
import kaleido
import os
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np
import os
import pyconll
from scipy.stats import mannwhitneyu

## Metrics

In this section we provide functions that are used as metrics for the future visualization.

In [None]:
def roleCounter(path: str, targetRoles: list[str]) -> dict[str, int]:
    """
    Counts the balance of targetRoles in provided data.

    Arguments:
        path : str
            The path to the file where results of one experiment are stored.
        targetRoles : list[str]
            The list of target arguments, that function is going to count in data.

    Return:
        dict[str, int]:
            The dictionary with counted statistics.
    """
    stats = {}

    with open(path, "r") as results:
        results = results.readlines()

    for line in results[1:]:
        role = line.split(",")[1]
        if role in targetRoles:
            if role not in stats:
                stats[role] = 0
            stats[role] += 1

    return stats


In [None]:
def layerAccuracy(path: str) -> dict[int, float]:
    """
    Calculates the accuracy for A-O classification per layer.

    Arguments:
        path : str
            The path to the file where results of one experiment are stored.

    Return:
        dict[int, float]:
            The dictionary with calculated accuracy per layer.
    """
    layers = {i: [0, 0] for i in range(13)}

    with open(path, "r") as results:
        results = results.readlines()

    for line in results[1:]:
        role, predicted_role, layer = (
            line.split(",")[1],
            line.split(",")[-5],
            line.split(",")[-3]
        )
        if role in ["A", "O"]:
            if role == predicted_role:
                layers[int(layer)][0] += 1
            layers[int(layer)][1] += 1

    for layer in layers:
        layers[layer] = layers[layer][0] / layers[layer][1]

    return layers


In [None]:
def roleClassProportion(path: str, target_role: str) -> dict[str, float]:
    """
    Calculates the proporion of target role classified as A or O per layer.

    Arguments:
        path : str
            The path to the file where results of one experiment are stored.
        target_role : str
            The role for which proportion is calculated.

    Return:
        dict[str, float]:
            The dictionary with calculated proportion of target argument classified as A or O per layer.
    """
    layers = {i: {"A": 0, "O": 0} for i in range(13)}

    with open(path, "r") as results:
        results = results.readlines()

    for line in results[1:]:
        role, predicted_role, layer = (
            line.split(",")[1],
            line.split(",")[-5],
            line.split(",")[-3]
        )
        if role == target_role:
            layers[int(layer)][predicted_role] += 1

    proportion = {"A": [], "O": []}
    for layer in layers:
        overall = sum(layers[layer].values())
        if overall:
            proportion["A"].append(layers[layer]["A"] / overall)
            proportion["O"].append(layers[layer]["O"] / overall)
        else:
            proportion["A"].append(0)
            proportion["O"].append(0)

    return proportion


In [None]:
def roleClassProportionAnimacy(
    path: str, target_role: str
) -> dict[str, list[str | int]]:
    """
    Calculates the proporion of target role classified as A or O per layer
    with animacy distinction.

    Arguments:
        path : str
            The path to the file where results of one experiment are stored.
        target_role : str
            The role for which proportion is calculated.

    Return:
       dict[str, list[str | int]]:
            The dictionary with calculated proportion of target argument classified as A or O per layer with animacy distinction.
    """

    animacyStats = {
        "role": ["A"] * 3 + ["O"] * 3,
        "animacy": ["Animate", "Inanimate", "None"] * 2,
        "cnt": [0] * 6,
    }

    with open(path, "r") as results:
        results = results.readlines()

    for line in results[1:]:
        role, animacy, predicted_role, layer = (
            line.split(",")[1],
            line.split(",")[3],
            line.split(",")[-5],
            line.split(",")[-3]
        )

        if role == target_role and predicted_role == "A":
            if animacy == "Anim":
                animacyStats["cnt"][0] += 1
            elif animacy == "Inan":
                animacyStats["cnt"][1] += 1
            else:
                animacyStats["cnt"][2] += 1

        if role == target_role and predicted_role == "O":
            if animacy == "Anim":
                animacyStats["cnt"][3] += 1
            elif animacy == "Inan":
                animacyStats["cnt"][4] += 1
            else:
                animacyStats["cnt"][5] += 1

    return animacyStats


## Loading Data

In this section we download data from the repository.

In [None]:
if not os.path.exists("BalancedLanguageSampling"):
    !git clone https://github.com/veronikatsareva/BalancedLanguageSampling

In [None]:
if not os.path.exists("Figures"):
    os.makedirs("Figures")

## Experiment №1

In this section we provide visualization of the results that we got during the experiment №1. At first, we deal with results of the original sample and then with the results of the balanced sample.

For more details, see Section 3.3 of the term paper.

### Original Sample

The dictionary where the key is the language and the value is the path to the results for this language.

In [None]:
originalDir = {
    "Basque": "/content/BalancedLanguageSampling/results/original sample/Basque/eu_bdt-ud_eu_bdt-ud-test.conllu_0",
    "Chinese": "/content/BalancedLanguageSampling/results/original sample/Chinese/zh_gsd-ud_zh_gsd-ud-test.conllu_0",
    "Croatian": "/content/BalancedLanguageSampling/results/original sample/Croatian/hr_set-ud_hr_set-ud-test.conllu_0",
    "Czech": "/content/BalancedLanguageSampling/results/original sample/Czech/cs_pdt-ud_cs_pdt-ud-test.conllu_0",
    "English": "/content/BalancedLanguageSampling/results/original sample/English/en_ewt-ud_en_ewt-ud-test.conllu_0",
    "Estonian": "/content/BalancedLanguageSampling/results/original sample/Estonian/et_edt-ud_et_edt-ud-test.conllu_0",
    "Finnish": "/content/BalancedLanguageSampling/results/original sample/Finnish/fi_tdt-ud_fi_tdt-ud-test.conllu_0",
    "French": "/content/BalancedLanguageSampling/results/original sample/French/fr_gsd-ud_fr_gsd-ud-test.conllu_0",
    "German": "/content/BalancedLanguageSampling/results/original sample/German/de_gsd-ud_de_gsd-ud-test.conllu_0",
    "Hebrew": "/content/BalancedLanguageSampling/results/original sample/Hebrew/he_htb-ud_he_htb-ud-test.conllu_0",
    "Hindi": "/content/BalancedLanguageSampling/results/original sample/Hindi/hi_hdtb-ud_hi_hdtb-ud-test.conllu_0",
    "Indonesian": "/content/BalancedLanguageSampling/results/original sample/Indonesian/id_gsd-ud_id_gsd-ud-test.conllu_0",
    "Latin": "/content/BalancedLanguageSampling/results/original sample/Latin/la_ittb-ud_la_ittb-ud-test.conllu_0",
    "Norwegian": "/content/BalancedLanguageSampling/results/original sample/Norwegian/no_bokmaal-ud_no_bokmaal-ud-test.conllu_0",
    "Persian": "/content/BalancedLanguageSampling/results/original sample/Persian/fa_seraji-ud_fa_seraji-ud-test.conllu_0",
    "Polish": "/content/BalancedLanguageSampling/results/original sample/Polish/pl_pdb-ud_pl_pdb-ud-test.conllu_0",
    "Russian": "/content/BalancedLanguageSampling/results/original sample/Russian/ru_syntagrus-ud_ru_syntagrus-ud-test.conllu_0",
    "Serbian": "/content/BalancedLanguageSampling/results/original sample/Serbian/sr_set-ud_sr_set-ud-test.conllu_0",
    "Slovak": "/content/BalancedLanguageSampling/results/original sample/Slovak/sk_snk-ud_sk_snk-ud-test.conllu_0",
    "Slovenian": "/content/BalancedLanguageSampling/results/original sample/Slovenian/sl_ssj-ud_sl_ssj-ud-test.conllu_0",
    "Spanish": "/content/BalancedLanguageSampling/results/original sample/Spanish/es_ancora-ud_es_ancora-ud-test.conllu_0",
    "Urdu": "/content/BalancedLanguageSampling/results/original sample/Urdu/ur_udtb-ud_ur_udtb-ud-test.conllu_0"
}

Here we create a dataframe where the stats for each core argument of the clause (S, O and A) and language is counted.

In [None]:
roleStatsOrig = {key : {} for key in originalDir.keys()}

for language in originalDir:
    roleStatsOrig[language] = roleCounter(originalDir[language], ["S", "A", "O"])

roleStatsOrig_df = pd.DataFrame(roleStatsOrig)
roleStatsOrig_df

Here we calculate the accuracy in A-O classification per layer for each language.

In [None]:
accuracyOrig = {}

for language in originalDir:
    accuracyOrig[language] = list(layerAccuracy(originalDir[language]).values())

accuracyOrig_df = pd.DataFrame.from_dict(accuracyOrig)

And visualize the results.

In [None]:
fig = make_subplots(rows=11, cols=2, subplot_titles=(list(originalDir.keys())))

cnt = 0

for language in originalDir:
    fig.add_trace(
        go.Scatter(
            x=[i for i in range(13)], y=accuracyOrig_df[language], showlegend=False
        ),
        row=cnt // 2 + 1,
        col=cnt % 2 % 11 + 1
    )

    cnt += 1

fig.update_layout(
    height=2000,
    width=800,
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text="Language",
    template="ggplot2",
    title_text="Accuracy for A-O classification per layer and language (Original Sample)"
)

fig.update_xaxes(title_text="Layers")
fig.update_yaxes(title_text="Accuracy")

# pio.write_image(fig, "Figures/Accuracy for A-O classification per layer and language (Original Sample).png",scale=6, width=800, height=2000)

fig.show()


In [None]:
genusColours = {
    "Basque": "rgb(160,227,183)",
    "Chinese": "rgb(30,92,74)",
    "Croatian": "rgb(26,167,238)",
    "Czech": "rgb(26,167,238)",
    "English": "rgb(102,155,144)",
    "Estonian": "rgb(85,241,123)",
    "Finnish": "rgb(85,241,123)",
    "French": "rgb(253,4,143)",
    "German": "rgb(102,155,144)",
    "Hebrew": "rgb(161,78,124)",
    "Hindi": "rgb(45,81,146)",
    "Indonesian": "rgb(242,131,227)",
    "Latin": "rgb(81,62,180)",
    "Norwegian": "rgb(102,155,144)",
    "Persian": "rgb(188,175,249)",
    "Polish": "rgb(26,167,238)",
    "Russian": "rgb(26,167,238)",
    "Serbian": "rgb(26,167,238)",
    "Slovak": "rgb(26,167,238)",
    "Slovenian": "rgb(26,167,238)",
    "Spanish": "rgb(253,4,143)",
    "Urdu": "rgb(45,81,146)"
}


fig = px.scatter(accuracyOrig_df, template="ggplot2", color_discrete_map=genusColours)

fig.update_xaxes(title_text="Layers")
fig.update_yaxes(title_text="Accuracy")

fig.update_layout(
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text='Language',
    title = "Accuracy for A-O classification per layer (Original Sample, overall)"
)

# pio.write_image(fig, "Figures/Accuracy for A-O classification per layer (Original Sample, overall).png",scale=6, width=1000, height=700)

fig.show()

The highest accuracy for each language and layer on which it was achieved.

In [None]:
highestAccuracyOrig = {"Value": [], "Layer" : []}

for language in accuracyOrig_df.columns:
    highestAccuracyOrig["Value"].append(accuracyOrig_df[language].max())
    highestAccuracyOrig["Layer"].append(int(accuracyOrig_df[language].argmax()))

highestAccuracyOrigdf = pd.DataFrame(highestAccuracyOrig, index=accuracyOrig_df.columns)
highestAccuracyOrigdf

The mean accuracy for each language.

In [None]:
accuracyOrig_df.mean()

Here we calculate what was more often predicted for S argument.

In [None]:
fig = make_subplots(rows=11, cols=2, subplot_titles=(list(originalDir.keys())))

cnt = 0

for language in originalDir:
    lang_data = pd.DataFrame.from_dict(roleClassProportion(originalDir[language], "S"))

    fig.add_trace(
        go.Scatter(
            name="A",
            x=[i for i in range(13)],
            y=lang_data["A"],
            marker=dict(color="green"),
            legendgroup="A",
            showlegend=False,
        ),
        row=cnt // 2 + 1,
        col=cnt % 2 % 11 + 1
    )

    fig.add_trace(
        go.Scatter(
            name="O",
            x=[i for i in range(13)],
            y=lang_data["O"],
            marker=dict(color="goldenrod"),
            legendgroup="O",
            showlegend=False,
        ),
        row=cnt // 2 + 1,
        col=cnt % 2 % 11 + 1
    )

    cnt += 1

fig.update_layout(
    height=2500,
    width=1000,
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text="Role",
    template="ggplot2",
    title_text="The proportion of S argument classified as A and O (Original sample)"
)
fig.update_xaxes(title_text="Layers")
fig.update_yaxes(title_text="Proportion")

# pio.write_image(fig, 'Figures/The proportion of S argument classified as A and O (Original sample).png',scale=6, width=1000, height=2500)

fig.show()


Here we calculate the amount of S argument predicted as A or O with marking the animacy of S.

In [None]:
fig = make_subplots(rows=11, cols=2, subplot_titles=(list(originalDir.keys())))

cnt = 0

for language in originalDir:
    lang_data = pd.DataFrame(roleClassProportionAnimacy(originalDir[language], "S"))

    colors = {"A": "green", "O": "goldenrod"}

    for y in lang_data.role.unique():
        dfy = lang_data[lang_data.role == y]

        fig.add_trace(
            go.Bar(
                name=str(y),
                x=dfy.animacy,
                y=dfy.cnt,
                marker=dict(color=colors[str(y)]),
                legendgroup=str(y),
                showlegend=False,
            ),
            row=cnt // 2 + 1,
            col=cnt % 2 % 11 + 1
        )

    cnt += 1

fig.update_layout(
    height=2000,
    width=800,
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text="Role",
    template="ggplot2",
    title_text="The amount of S argument classified as A and O with animacy distinction (Original sample)"
)

# pio.write_image(fig, 'Figures/The amount of S argument classified as A and O with animacy distinction (Original sample).png',scale=6, width=800, height=2000)

fig.show()


Here we calculate what was more often predicted for S-passive.

In [None]:
fig = make_subplots(rows=11, cols=2, subplot_titles=(list(originalDir.keys())))

cnt = 0

for language in originalDir:
    lang_data = pd.DataFrame.from_dict(roleClassProportion(originalDir[language], "S-passive"))

    fig.add_trace(
        go.Scatter(
            name="A",
            x=[i for i in range(13)],
            y=lang_data["A"],
            marker=dict(color="green"),
            legendgroup="A",
            showlegend=False,
        ),
        row=cnt // 2 + 1,
        col=cnt % 2 % 11 + 1
    )

    fig.add_trace(
        go.Scatter(
            name="O",
            x=[i for i in range(13)],
            y=lang_data["O"],
            marker=dict(color="goldenrod"),
            legendgroup="O",
            showlegend=False,
        ),
        row=cnt // 2 + 1,
        col=cnt % 2 % 11 + 1
    )

    cnt += 1

fig.update_layout(
    height=2000,
    width=800,
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text="Role",
    template="ggplot2",
    title_text="The proportion of S-passive classified as A and O (Original sample)"
)
fig.update_xaxes(title_text="Layers")
fig.update_yaxes(title_text="Proportion")

# pio.write_image(fig, "Figures/The proportion of S-passive classified as A and O (Original sample).png",scale=6, width=800, height=2000)

fig.show()


In [None]:
fig = make_subplots(rows=6, cols=2, subplot_titles=["Chinese", "Czech", "English", "French", "German", "Hindi", "Indonesian", "Latin", "Persian", "Russian", "Slovak", "Spanish"])

cnt = 0

for language in ["Chinese", "Czech", "English", "French", "German", "Hindi", "Indonesian", "Latin", "Persian", "Russian", "Slovak", "Spanish"]:
    lang_data = pd.DataFrame.from_dict(roleClassProportion(originalDir[language], "S-passive"))

    fig.add_trace(
        go.Scatter(
            name="A",
            x=[i for i in range(13)],
            y=lang_data["A"],
            marker=dict(color="green"),
            legendgroup="A",
            showlegend=False,
        ),
        row=cnt // 2 + 1,
        col=cnt % 2 % 6 + 1
    )

    fig.add_trace(
        go.Scatter(
            name="O",
            x=[i for i in range(13)],
            y=lang_data["O"],
            marker=dict(color="goldenrod"),
            legendgroup="O",
            showlegend=False,
        ),
        row=cnt // 2 + 1,
        col=cnt % 2 % 6 + 1
    )

    cnt += 1

fig.update_layout(
    height=1200,
    width=800,
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text="Role",
    template="ggplot2",
    title_text="The proportion of S-passive classified as A and O (Original sample)"
)
fig.update_xaxes(title_text="Layers")
fig.update_yaxes(title_text="Proportion")

# pio.write_image(fig, "Figures/The proportion of S-passive classified as A and O (Original sample).png",scale=6, width=800, height=1200)

fig.show()


### Balanced Sample

The dictionary where the key is the language and the value is the path to the results for this language.

In [None]:
balancedDir = {
    "Afrikaans": "/content/BalancedLanguageSampling/results/balanced sample/Afrikaans/af_afribooms-ud_af_afribooms-ud-test.conllu_0",
    "Arabic": "/content/BalancedLanguageSampling/results/balanced sample/Arabic/ar_nyuad-ud_ar_nyuad-ud-test.conllu_0",
    "Armenian": "/content/BalancedLanguageSampling/results/balanced sample/Armenian/hy_armtdp-ud_hy_armtdp-ud-test.conllu_0",
    "Basque": "/content/BalancedLanguageSampling/results/balanced sample/Basque/eu_bdt-ud_eu_bdt-ud-test.conllu_0",
    "Chinese": "/content/BalancedLanguageSampling/results/balanced sample/Chinese/zh_gsd-ud_zh_gsd-ud-test.conllu_0",
    "Finnish": "/content/BalancedLanguageSampling/results/balanced sample/Finnish/fi_tdt-ud_fi_tdt-ud-test.conllu_0",
    "Georgian": "/content/BalancedLanguageSampling/results/balanced sample/Georgian/ka_glc-ud_ka_glc-ud-test.conllu_0",
    "Greek": "/content/BalancedLanguageSampling/results/balanced sample/Greek/el_gdt-ud_el_gdt-ud-test.conllu_0",
    "Hindi": "/content/BalancedLanguageSampling/results/balanced sample/Hindi/hi_hdtb-ud_hi_hdtb-ud-test.conllu_0",
    "Hungarian": "/content/BalancedLanguageSampling/results/balanced sample/Hungarian/hu_szeged-ud_hu_szeged-ud-test.conllu_0",
    "Indonesian": "/content/BalancedLanguageSampling/results/balanced sample/Indonesian/id_gsd-ud_id_gsd-ud-test.conllu_0",
    "Japanese": "/content/BalancedLanguageSampling/results/balanced sample/Japanese/ja_gsd-ud_ja_gsd-ud-test.conllu_0",
    "Korean": "/content/BalancedLanguageSampling/results/balanced sample/Korean/ko_kaist-ud_ko_kaist-ud-test.conllu_0",
    "Latin": "/content/BalancedLanguageSampling/results/balanced sample/Latin/la_ittb-ud_la_ittb-ud-test.conllu_0",
    "Persian": "/content/BalancedLanguageSampling/results/balanced sample/Persian/fa_perdt-ud_fa_perdt-ud-test.conllu_0",
    "Portuguese": "/content/BalancedLanguageSampling/results/balanced sample/Portuguese/pt_cintil-ud_pt_cintil-ud-test.conllu_0",
    "Russian": "/content/BalancedLanguageSampling/results/balanced sample/Russian/ru_syntagrus-ud_ru_syntagrus-ud-test.conllu_0",
    "Turkish": "/content/BalancedLanguageSampling/results/balanced sample/Turkish/tr_penn-ud_tr_penn-ud-test.conllu_0",
    "Vietnamese": "/content/BalancedLanguageSampling/results/balanced sample/Vietnamese/vi_vtb-ud_vi_vtb-ud-test.conllu_0"
}


Here we create a dataframe where the stats for each argument and language is counted.

In [None]:
roleStatsBalanced = {key : {} for key in balancedDir.keys()}

for language in balancedDir:
    roleStatsBalanced[language] = roleCounter(balancedDir[language], ["S", "A", "O"])

roleStatsBalanced_df = pd.DataFrame(roleStatsBalanced)
roleStatsBalanced_df

Here we calculate the accuracy in A-O classification per layer for each language.

In [None]:
accuracyBalanced = {}

for language in balancedDir:
    accuracyBalanced[language] = list(layerAccuracy(balancedDir[language]).values())

accuracyBalanced_df = pd.DataFrame.from_dict(accuracyBalanced)

And visualize the results.

In [None]:
fig = make_subplots(rows=10, cols=2, subplot_titles=(list(balancedDir.keys())))

cnt = 0

for language in balancedDir:
    fig.add_trace(
        go.Scatter(
            x=[i for i in range(13)], y=accuracyBalanced_df[language], showlegend=False
        ),
        row=cnt // 2 + 1,
        col=cnt % 2 % 10 + 1
    )

    cnt += 1

fig.update_layout(
    height=2000,
    width=800,
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text="Language",
    template="ggplot2",
    title_text="Accuracy for A-O classification per layer and language (Balanced Sample)"
)
fig.update_xaxes(title_text="Layers")
fig.update_yaxes(title_text="Accuracy")

# pio.write_image(fig, "Figures/Accuracy for A-O classification per layer and language (Balanced Sample).png",scale=6, width=800, height=2000)

fig.show()


In [None]:
fig = px.scatter(accuracyBalanced_df, template="ggplot2")

fig.update_xaxes(title_text="Layers")
fig.update_yaxes(title_text="Accuracy")

fig.update_layout(
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text='Language',
    title = "Accuracy for A-O classification per layer (Balanced Sample, overall)"
)

# pio.write_image(fig, 'Figures/Accuracy for A-O classification per layer (Balanced Sample, overall)',scale=6, width=1000, height=700)

fig.show()

The highest accuracy for each language and layer on which it was achieved.

In [None]:
highestAccuracyBalanced = {"Value": [], "Layer" : []}

for language in accuracyBalanced_df.columns:
    highestAccuracyBalanced["Value"].append(accuracyBalanced_df[language].max())
    highestAccuracyBalanced["Layer"].append(int(accuracyBalanced_df[language].argmax()))

highestAccuracyBalanceddf = pd.DataFrame(highestAccuracyBalanced, index=accuracyBalanced_df.columns)
highestAccuracyBalanceddf

The mean accuracy for each language.

In [None]:
accuracyBalanced_df.mean()

Here we calculate what was more often predicted as S argument.

In [None]:
fig = make_subplots(rows=10, cols=2, subplot_titles=(list(balancedDir.keys())))

cnt = 0

for language in balancedDir:
    lang_data = pd.DataFrame.from_dict(roleClassProportion(balancedDir[language], "S"))

    fig.add_trace(
        go.Scatter(
            name="A",
            x=[i for i in range(13)],
            y=lang_data["A"],
            marker=dict(color="green"),
            legendgroup="A",
            showlegend=False,
        ),
        row=cnt // 2 + 1,
        col=cnt % 2 % 10 + 1
    )

    fig.add_trace(
        go.Scatter(
            name="O",
            x=[i for i in range(13)],
            y=lang_data["O"],
            marker=dict(color="goldenrod"),
            legendgroup="O",
            showlegend=False,
        ),
        row=cnt // 2 + 1,
        col=cnt % 2 % 10 + 1
    )

    cnt += 1

fig.update_layout(
    height=2000,
    width=800,
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text="Role",
    template="ggplot2",
    title_text="The proportion of S argument classified as A and O (Balanced sample)"
)

fig.update_xaxes(title_text="Layers")
fig.update_yaxes(title_text="Proportion")

# pio.write_image(fig, 'Figures/The proportion of S argument classified as A and O (Balanced sample).png',scale=6, width=800, height=2000)

fig.show()


Here we calculate the amount of S argument predicted as A or O with marking the animacy of S.

In [None]:
fig = make_subplots(rows=10, cols=2, subplot_titles=(list(balancedDir.keys())))

cnt = 0

for language in balancedDir:
    lang_data = pd.DataFrame(roleClassProportionAnimacy(balancedDir[language], "S"))

    colors = {"A": "green", "O": "goldenrod"}

    for y in lang_data.role.unique():
        dfy = lang_data[lang_data.role == y]

        fig.add_trace(
            go.Bar(
                name=str(y),
                x=dfy.animacy,
                y=dfy.cnt,
                marker=dict(color=colors[str(y)]),
                legendgroup=str(y),
                showlegend=False,
            ),
            row=cnt // 2 + 1,
            col=cnt % 2 % 10 + 1
        )

    cnt += 1

fig.update_layout(
    height=2000,
    width=800,
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text="Role",
    template="ggplot2",
    title_text="The amount of S argument classified as A and O with animacy distinction (Balanced sample)"
)

# pio.write_image(fig, 'Figures/The amount of S argument classified as A and O with animacy distinction (Balanced sample).png',scale=6, width=800, height=2000)

fig.show()


Here we calculate what was more often predicted for S-passive.

In [None]:
fig = make_subplots(rows=10, cols=2, subplot_titles=(list(balancedDir.keys())))

cnt = 0

for language in balancedDir:
    lang_data = pd.DataFrame.from_dict(roleClassProportion(balancedDir[language], "S-passive"))

    fig.add_trace(
        go.Scatter(
            name="A",
            x=[i for i in range(13)],
            y=lang_data["A"],
            marker=dict(color="green"),
            legendgroup="A",
            showlegend=False,
        ),
        row=cnt // 2 + 1,
        col=cnt % 2 % 10 + 1
    )

    fig.add_trace(
        go.Scatter(
            name="O",
            x=[i for i in range(13)],
            y=lang_data["O"],
            marker=dict(color="goldenrod"),
            legendgroup="O",
            showlegend=False,
        ),
        row=cnt // 2 + 1,
        col=cnt % 2 % 10 + 1
    )

    cnt += 1

fig.update_layout(
    height=2000,
    width=800,
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text="Role",
    template="ggplot2",
    title_text="The proportion of S-passive classified as A and O (Balanced sample)"
)
fig.update_xaxes(title_text="Layers")
fig.update_yaxes(title_text="Proportion")

# pio.write_image(fig, 'Figures/The proportion of S-passive classified as A and O (Balanced sample).png',scale=6, width=800, height=2000)

fig.show()


### Comparing two samples

In this section we compare the mean accuracy for each layer in two samples.

In [None]:
meanAccuracyComp = pd.concat([accuracyOrig_df.mean(axis=1), accuracyBalanced_df.mean(axis=1)], axis=1).rename(columns={0: "Original sample", 1: "Balanced sample"})

In [None]:
fig = go.Figure()
fig.add_trace(go.Box(y=meanAccuracyComp["Original sample"], name='Original sample',
                marker_color = 'orangered'))
fig.add_trace(go.Box(y=meanAccuracyComp["Balanced sample"], name = 'Balanced sample',
                marker_color = 'olive'))

fig.update_yaxes(title_text="Accuracy")

fig.update_layout(
    template="ggplot2",
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text="Language",
    showlegend=False,
    title="Boxplot for mean accuracy per layer (Original vs. Balanced Sample)"
)

# pio.write_image(fig, 'Figures/Boxplot for mean accuracy per layer (Original vs. Balanced Sample).png',scale=6, width=1000, height=500)

fig.show()

In [None]:
fig = px.scatter(
    meanAccuracyComp,
    template="ggplot2"
)

fig.update_xaxes(title_text="Layers")
fig.update_yaxes(title_text="Mean accuracy")

fig.update_layout(
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text="Language",
    title="Mean accuracy per layer (Original vs. Balanced Sample)"
)

# pio.write_image(fig, 'Figures/Mean accuracy per layer (Original vs. Balanced Sample).png',scale=6, width=1000, height=500)

fig.show()


Here we use the Mann-Whitney U-test to examine whether the distribution of the accuracy in the original sample differs from the balanced sample.

In [None]:
mannwhitneyu(meanAccuracyComp["Original sample"], meanAccuracyComp["Balanced sample"])

## Experiment №2

In this section we provide visualization of the results that we got during the experiment №2. At first we deal with results of the original sample and then with the results of the balanced sample.

For more details, see Section 3.4 of the term paper.

### Original Sample

The dictionary where the value is the name of the testing language and the key is the path to the results for this pair with Basque (train language).

In [None]:
basqueDataOrig = {
    "/content/BalancedLanguageSampling/results/original sample/Persian/eu_bdt-ud_fa_seraji-ud-train.conllu_0": "Persian",
    "/content/BalancedLanguageSampling/results/original sample/Slovak/eu_bdt-ud_sk_snk-ud-train.conllu_0": "Slovak",
    "/content/BalancedLanguageSampling/results/original sample/German/eu_bdt-ud_de_gsd-ud-train.conllu_0": "German",
    "/content/BalancedLanguageSampling/results/original sample/Russian/eu_bdt-ud_ru_syntagrus-ud-train.conllu_0": "Russian",
    "/content/BalancedLanguageSampling/results/original sample/Basque/eu_bdt-ud_eu_bdt-ud-test.conllu_0": "Basque",
    "/content/BalancedLanguageSampling/results/original sample/Chinese/eu_bdt-ud_zh_gsd-ud-train.conllu_0": "Chinese",
    "/content/BalancedLanguageSampling/results/original sample/French/eu_bdt-ud_fr_gsd-ud-train.conllu_0": "French",
    "/content/BalancedLanguageSampling/results/original sample/English/eu_bdt-ud_en_ewt-ud-train.conllu_0": "English",
    "/content/BalancedLanguageSampling/results/original sample/Polish/eu_bdt-ud_pl_pdb-ud-train.conllu_0": "Polish",
    "/content/BalancedLanguageSampling/results/original sample/Hindi/eu_bdt-ud_hi_hdtb-ud-train.conllu_0": "Hindi",
    "/content/BalancedLanguageSampling/results/original sample/Latin/eu_bdt-ud_la_ittb-ud-train.conllu_0": "Latin",
    "/content/BalancedLanguageSampling/results/original sample/Croatian/eu_bdt-ud_hr_set-ud-train.conllu_0": "Croatian",
    "/content/BalancedLanguageSampling/results/original sample/Hebrew/eu_bdt-ud_he_htb-ud-train.conllu_0": "Hebrew",
    "/content/BalancedLanguageSampling/results/original sample/Indonesian/eu_bdt-ud_id_gsd-ud-train.conllu_0": "Indonesian",
    "/content/BalancedLanguageSampling/results/original sample/Urdu/eu_bdt-ud_ur_udtb-ud-train.conllu_0": "Urdu",
    "/content/BalancedLanguageSampling/results/original sample/Czech/eu_bdt-ud_cs_pdt-ud-train.conllu_0": "Czech",
    "/content/BalancedLanguageSampling/results/original sample/Slovenian/eu_bdt-ud_sl_ssj-ud-train.conllu_0": "Slovenian",
    "/content/BalancedLanguageSampling/results/original sample/Spanish/eu_bdt-ud_es_ancora-ud-train.conllu_0": "Spanish",
    "/content/BalancedLanguageSampling/results/original sample/Serbian/eu_bdt-ud_sr_set-ud-train.conllu_0": "Serbian",
    "/content/BalancedLanguageSampling/results/original sample/Norwegian/eu_bdt-ud_no_bokmaal-ud-train.conllu_0": "Norwegian",
    "/content/BalancedLanguageSampling/results/original sample/Finnish/eu_bdt-ud_fi_tdt-ud-train.conllu_0": "Finnish",
    "/content/BalancedLanguageSampling/results/original sample/Estonian/eu_bdt-ud_et_edt-ud-train.conllu_0": "Estonian",
    "/content/BalancedLanguageSampling/results/original sample/Japanese/eu_bdt-ud_ja_gsd-ud-train.conllu_0": "Japanese"
}


Here we calculate the accuracy in A-O classification per layer for each pair.

In [None]:
accuracyBasqOrig = {}

for language in basqueDataOrig:
    accuracyBasqOrig[basqueDataOrig[language]] = list(layerAccuracy(language).values())

accuracyBasqOrig_df = pd.DataFrame.from_dict(accuracyBasqOrig)

And visualize the results.

In [None]:
fig = make_subplots(rows=12, cols=2, subplot_titles=(list(basqueDataOrig.values())))

cnt = 0

for language in basqueDataOrig:
    fig.add_trace(
        go.Scatter(
            x=[i for i in range(13)], y=accuracyBasqOrig_df[basqueDataOrig[language]], showlegend=False
        ),
        row=cnt // 2 + 1,
        col=cnt % 2 % 12 + 1
    )

    cnt += 1

fig.update_layout(
    height=2000,
    width=800,
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    template="ggplot2",
    title_text="Accuracy for A-O classification per layer (trained on Basque, Original sample)"
)

fig.update_xaxes(title_text="Layers")
fig.update_yaxes(title_text="Accuracy")

# pio.write_image(fig, 'Figures/Accuracy for A-O classification per layer (trained on Basque, Original sample).png',scale=6, width=800, height=2000)

fig.show()


In [None]:
genusColours = {
    "Basque": "rgb(160,227,183)",
    "Chinese": "rgb(30,92,74)",
    "Croatian": "rgb(26,167,238)",
    "Czech": "rgb(26,167,238)",
    "English": "rgb(102,155,144)",
    "Estonian": "rgb(85,241,123)",
    "Finnish": "rgb(85,241,123)",
    "French": "rgb(253,4,143)",
    "German": "rgb(102,155,144)",
    "Hebrew": "rgb(161,78,124)",
    "Hindi": "rgb(45,81,146)",
    "Indonesian": "rgb(242,131,227)",
    "Latin": "rgb(81,62,180)",
    "Norwegian": "rgb(102,155,144)",
    "Persian": "rgb(188,175,249)",
    "Polish": "rgb(26,167,238)",
    "Russian": "rgb(26,167,238)",
    "Serbian": "rgb(26,167,238)",
    "Slovak": "rgb(26,167,238)",
    "Slovenian": "rgb(26,167,238)",
    "Spanish": "rgb(253,4,143)",
    "Urdu": "rgb(45,81,146)"
}


fig = px.scatter(accuracyBasqOrig_df, template="ggplot2", color_discrete_map=genusColours)

fig.update_xaxes(title_text="Layers")
fig.update_yaxes(title_text="Accuracy")

fig.update_layout(
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text='Language',
    title = "Accuracy for A-O classification per layer (trained on Basque, Original sample, overall)"
)

# pio.write_image(fig, 'Figures/Accuracy for A-O classification per layer (trained on Basque, Original sample, overall).png',scale=6, width=1000, height=700)

fig.show()

The highest accuracy for each testing language and layer on which it was achieved.

In [None]:
highestAccuracyBasq = {"Value": [], "Layer" : []}

for language in accuracyBasqOrig_df.columns:
    highestAccuracyBasq["Value"].append(accuracyBasqOrig_df[language].max())
    highestAccuracyBasq["Layer"].append(int(accuracyBasqOrig_df[language].argmax()))

highestAccuracyBasqdf = pd.DataFrame(highestAccuracyBasq, index=accuracyBasqOrig_df.columns)
highestAccuracyBasqdf

The mean accuracy for each language.

In [None]:
accuracyBasqOrig_df.mean()

Here we calculate, what was more often predicted as S argument.

In [None]:
fig = make_subplots(rows=12, cols=2, subplot_titles=(list(basqueDataOrig.values())))

cnt = 0

for language in basqueDataOrig:
    lang_data = pd.DataFrame.from_dict(roleClassProportion(language, "S"))

    fig.add_trace(
        go.Scatter(
            name="A",
            x=[i for i in range(13)],
            y=lang_data["A"],
            marker=dict(color="green"),
            legendgroup="A",
            showlegend=False,
        ),
        row=cnt // 2 + 1,
        col=cnt % 2 % 12 + 1
    )

    fig.add_trace(
        go.Scatter(
            name="O",
            x=[i for i in range(13)],
            y=lang_data["O"],
            marker=dict(color="goldenrod"),
            legendgroup="O",
            showlegend=False,
        ),
        row=cnt // 2 + 1,
        col=cnt % 2 % 12 + 1
    )

    cnt += 1

fig.update_layout(
    height=2000,
    width=800,
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text="Role",
    template="ggplot2",
    title_text="The proportion of S argument classified as A and O (trained on Basque, Original sample)"
)

fig.update_xaxes(title_text="Layers")
fig.update_yaxes(title_text="Proportion")

# pio.write_image(fig, 'Figures/The proportion of S argument classified as A and O (trained on Basque, Original sample).png',scale=6, width=1000, height=2500)

fig.show()


Here we calculate the amount of S argument predicted as A or O with marking the animacy of S.

In [None]:
fig = make_subplots(rows=12, cols=2, subplot_titles=(list(basqueDataOrig.values())))

cnt = 0

for language in basqueDataOrig:
    lang_data = pd.DataFrame(roleClassProportionAnimacy(language, "S"))

    colors = {"A": "green", "O": "goldenrod"}

    for y in lang_data.role.unique():
        dfy = lang_data[lang_data.role == y]

        fig.add_trace(
            go.Bar(
                name=str(y),
                x=dfy.animacy,
                y=dfy.cnt,
                marker=dict(color=colors[str(y)]),
                legendgroup=str(y),
                showlegend=False,
            ),
            row=cnt // 2 + 1,
            col=cnt % 2 % 12 + 1
        )

    cnt += 1

fig.update_layout(
    height=2000,
    width=800,
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text="Role",
    template="ggplot2",
    title_text="The amount of S argument classified as A and O with animacy distinction (trained on Basque, Original sample)"
)

# pio.write_image(fig, 'Figures/The amount of S argument classified as A and O with animacy distinction (trained on Basque, Original sample).png',scale=6, width=1000, height=2500)

fig.show()


### Balanced Sample

The dictionary where the value is the name of the testing language and the key is the path to the results for this pair with Basque (train language).

In [None]:
basqueDataBalanced = {
    "/content/BalancedLanguageSampling/results/balanced sample/Afrikaans/eu_bdt-ud_af_afribooms-ud-train.conllu_0": "Afrikaans",
    "/content/BalancedLanguageSampling/results/balanced sample/Korean/eu_bdt-ud_ko_kaist-ud-train.conllu_0": "Korean",
    "/content/BalancedLanguageSampling/results/balanced sample/Basque/eu_bdt-ud_eu_bdt-ud-test.conllu_0": "Basque",
    "/content/BalancedLanguageSampling/results/balanced sample/Portuguese/eu_bdt-ud_pt_cintil-ud-train.conllu_0": "Portuguese",
    "/content/BalancedLanguageSampling/results/balanced sample/Indonesian/eu_bdt-ud_id_gsd-ud-train.conllu_0": "Indonesian",
    "/content/BalancedLanguageSampling/results/balanced sample/Armenian/eu_bdt-ud_hy_armtdp-ud-train.conllu_0": "Armenian",
    "/content/BalancedLanguageSampling/results/balanced sample/Russian/eu_bdt-ud_ru_syntagrus-ud-train.conllu_0": "Russian",
    "/content/BalancedLanguageSampling/results/balanced sample/Greek/eu_bdt-ud_el_gdt-ud-train.conllu_0": "Greek",
    "/content/BalancedLanguageSampling/results/balanced sample/Chinese/eu_bdt-ud_zh_gsd-ud-train.conllu_0": "Chinese",
    "/content/BalancedLanguageSampling/results/balanced sample/Latin/eu_bdt-ud_la_ittb-ud-train.conllu_0": "Latin",
    "/content/BalancedLanguageSampling/results/balanced sample/Arabic/eu_bdt-ud_ar_nyuad-ud-train.conllu_0": "Arabic",
    "/content/BalancedLanguageSampling/results/balanced sample/Vietnamese/eu_bdt-ud_vi_vtb-ud-train.conllu_0": "Vietnamese",
    "/content/BalancedLanguageSampling/results/balanced sample/Hungarian/eu_bdt-ud_hu_szeged-ud-train.conllu_0": "Hungarian",
    "/content/BalancedLanguageSampling/results/balanced sample/Georgian/eu_bdt-ud_ka_glc-ud-train.conllu_0": "Georgian",
    "/content/BalancedLanguageSampling/results/balanced sample/Finnish/eu_bdt-ud_fi_tdt-ud-train.conllu_0": "Finnish",
    "/content/BalancedLanguageSampling/results/balanced sample/Turkish/eu_bdt-ud_tr_penn-ud-train.conllu_0": "Turkish",
    "/content/BalancedLanguageSampling/results/balanced sample/Hindi/eu_bdt-ud_hi_hdtb-ud-train.conllu_0": "Hindi",
    "/content/BalancedLanguageSampling/results/balanced sample/Japanese/eu_bdt-ud_ja_gsd-ud-train.conllu_0": "Japanese",
    "/content/BalancedLanguageSampling/results/balanced sample/Persian/eu_bdt-ud_fa_perdt-ud-train.conllu_0": "Persian",
    "/content/BalancedLanguageSampling/results/balanced sample/Javanese/eu_bdt-ud_jv_csui-ud-test.conllu_0": "Javanese",
    "/content/BalancedLanguageSampling/results/balanced sample/Tamil/eu_bdt-ud_ta_ttb-ud-train.conllu_0": "Tamil"
}


Here we calculate the accuracy in A-O classification per layer for each pair.

In [None]:
accuracyBasqBalanced = {}

for language in basqueDataBalanced:
    accuracyBasqBalanced[basqueDataBalanced[language]] = list(layerAccuracy(language).values())

accuracyBasqBalanced_df = pd.DataFrame.from_dict(accuracyBasqBalanced)

And visualize the results.

In [None]:
fig = make_subplots(rows=11, cols=2, subplot_titles=(list(basqueDataBalanced.values())))

cnt = 0

for language in basqueDataBalanced:
    fig.add_trace(
        go.Scatter(
            x=[i for i in range(13)], y=accuracyBasqBalanced_df[basqueDataBalanced[language]], showlegend=False
        ),
        row=cnt // 2 + 1,
        col=cnt % 2 % 11 + 1
    )

    cnt += 1

fig.update_layout(
    height=2000,
    width=800,
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    template="ggplot2",
    title_text="Accuracy for A-O classification per layer (trained on Basque, Balanced sample)"
)

fig.update_xaxes(title_text="Layers")
fig.update_yaxes(title_text="Accuracy")

# pio.write_image(fig, 'Figures/Accuracy for A-O classification per layer (trained on Basque, Balanced sample).png',scale=6, width=800, height=2000)

fig.show()


In [None]:
fig = px.scatter(accuracyBasqBalanced_df, template="ggplot2")

fig.update_xaxes(title_text="Layers")
fig.update_yaxes(title_text="Accuracy")

fig.update_layout(
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text='Language',
    title = "Accuracy for A-O classification per layer (trained on Basque, overall, Balanced sample)"
)

# pio.write_image(fig, 'Figures/Accuracy for A-O classification per layer (trained on Basque, overall, Balanced sample).png',scale=6, width=1000, height=700)

fig.show()

The highest accuracy for each language and layer on which it was achieved.

In [None]:
highestAccuracyBasqB = {"Value": [], "Layer" : []}

for language in accuracyBasqBalanced_df.columns:
    highestAccuracyBasqB["Value"].append(accuracyBasqBalanced_df[language].max())
    highestAccuracyBasqB["Layer"].append(int(accuracyBasqBalanced_df[language].argmax()))

highestAccuracyBasqBdf = pd.DataFrame(highestAccuracyBasqB, index=accuracyBasqBalanced_df.columns)
highestAccuracyBasqBdf

The mean accuracy for each language.

In [None]:
accuracyBasqBalanced_df.mean()

Here we calculate, what was more often predicted as S argument.

In [None]:
fig = make_subplots(rows=11, cols=2, subplot_titles=(list(basqueDataBalanced.values())))

cnt = 0

for language in basqueDataBalanced:
    lang_data = pd.DataFrame.from_dict(roleClassProportion(language, "S"))

    fig.add_trace(
        go.Scatter(
            name="A",
            x=[i for i in range(13)],
            y=lang_data["A"],
            marker=dict(color="green"),
            legendgroup="A",
            showlegend=False,
        ),
        row=cnt // 2 + 1,
        col=cnt % 2 % 11 + 1
    )

    fig.add_trace(
        go.Scatter(
            name="O",
            x=[i for i in range(13)],
            y=lang_data["O"],
            marker=dict(color="goldenrod"),
            legendgroup="O",
            showlegend=False,
        ),
        row=cnt // 2 + 1,
        col=cnt % 2 % 11 + 1
    )

    cnt += 1

fig.update_layout(
    height=2000,
    width=800,
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text="Role",
    template="ggplot2",
    title_text="The proportion of S argument classified as A and O (trained on Basque, Balanced sample)"
)

fig.update_xaxes(title_text="Layers")
fig.update_yaxes(title_text="Proportion")

# pio.write_image(fig, 'Figures/The proportion of S argument classified as A and O (trained on Basque, Balanced sample).png',scale=6, width=800, height=2000)

fig.show()


Here we calculate the proportion of S argument predicted as A or O with marking the animacy of S.

In [None]:
fig = make_subplots(rows=11, cols=2, subplot_titles=(list(basqueDataBalanced.values())))

cnt = 0

for language in basqueDataBalanced:
    lang_data = pd.DataFrame(roleClassProportionAnimacy(language, "S"))

    colors = {"A": "green", "O": "goldenrod"}

    for y in lang_data.role.unique():
        dfy = lang_data[lang_data.role == y]

        fig.add_trace(
            go.Bar(
                name=str(y),
                x=dfy.animacy,
                y=dfy.cnt,
                marker=dict(color=colors[str(y)]),
                legendgroup=str(y),
                showlegend=False,
            ),
            row=cnt // 2 + 1,
            col=cnt % 2 % 11 + 1
        )

    cnt += 1

fig.update_layout(
    height=2000,
    width=800,
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text="Role",
    template="ggplot2",
    title_text="The amount of S argument classified as A and O with animacy distinction (trained on Basque, Balanced sample)"
)

# pio.write_image(fig, 'Figures/The amount of S argument classified as A and O with animacy distinction (trained on Basque, Balanced sample).png',scale=6, width=800, height=2000)

fig.show()


### Comparing two samples

In this section we compare the mean accuracy for each layer in two samples.

In [None]:
meanAccuracyComp = pd.concat([accuracyBasqOrig_df.mean(axis=1), accuracyBasqBalanced_df.mean(axis=1)], axis=1).rename(columns={0: "Original sample", 1: "Balanced sample"})

In [None]:
fig = go.Figure()
fig.add_trace(go.Box(y=meanAccuracyComp["Original sample"], name='Original sample', marker_color = 'orangered'))
fig.add_trace(go.Box(y=meanAccuracyComp["Balanced sample"], name = 'Balanced sample', marker_color = 'olive'))

fig.update_yaxes(title_text="Accuracy")

fig.update_layout(
    template="ggplot2",
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text="Language",
    showlegend=False,
    title="Boxplot for mean accuracy per layer (Original vs. Balanced Sample)"
)

# pio.write_image(fig, 'Figures/Boxplot for mean accuracy per layer (Original vs. Balanced Sample).png',scale=6, width=1000, height=500)

fig.show()

In [None]:
fig = px.scatter(
    meanAccuracyComp,
    template="ggplot2",
)

fig.update_xaxes(title_text="Layers")
fig.update_yaxes(title_text="Mean accuracy")

fig.update_layout(
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text="Language",
    title="Mean accuracy per layer (Original vs. Balanced Sample)"
)

# pio.write_image(fig, 'Figures/Mean accuracy per layer (Original vs. Balanced Sample).png',scale=6, width=1000, height=500)

fig.show()


Here we use the Mann-Whitney U-test to examine whether the distribution of the accuracy in the original sample differs from the balanced sample.

In [None]:
mannwhitneyu(meanAccuracyComp["Original sample"], meanAccuracyComp["Balanced sample"])

## Experiment №3

In this section we provide visualization of the results that we got during the experiment №3 on the extra sample.

For more details, see Section 3.5 of the term paper.

### Extra Sample

The dictionary where the key is the language and the value is the path to the results for this language.

In [None]:
extraDir = {
    "Naija": "/content/BalancedLanguageSampling/results/extra subsample/Naija/pcm_nsc-ud-test.conllu_0",
    "North Saami": "/content/BalancedLanguageSampling/results/extra subsample/North Saami/sme_giella-ud-test.conllu_0",
    "Sanskrit": "/content/BalancedLanguageSampling/results/extra subsample/Sanskrit/sa_vedic-ud-test.conllu_0"
}

Here we create a dataframe where the stats for each role and language is counted.

In [None]:
roleStatsExtra = {key : {} for key in extraDir.keys()}

for language in extraDir:
    roleStatsExtra[language] = roleCounter(extraDir[language], ["S", "A", "O"])

roleStatsExtra_df = pd.DataFrame(roleStatsExtra)
roleStatsExtra_df

Here we calculate the accuracy in A-O classification per layer for each language.

In [None]:
accuracyExtra = {}

for language in extraDir:
    accuracyExtra[language] = list(layerAccuracy(extraDir[language]).values())

accuracyExtra_df = pd.DataFrame.from_dict(accuracyExtra)

And visualize the results.

In [None]:
fig = make_subplots(rows=3, cols=1, subplot_titles=(list(extraDir.keys())))

cnt = 1

for language in extraDir:
    fig.add_trace(
        go.Scatter(
            x=[i for i in range(13)], y=accuracyExtra_df[language], showlegend=False
        ),
        row=cnt,
        col=1
    )

    cnt += 1

fig.update_layout(
    height=1000,
    width=800,
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text="Language",
    template="ggplot2",
    title_text="Accuracy for A-O classification per layer (Extra Sample)"
)

fig.update_xaxes(title_text="Layers")
fig.update_yaxes(title_text="Accuracy")

# pio.write_image(fig, 'Figures/Accuracy for A-O classification per layer (Extra Sample).png',scale=6, width=800, height=1000)

fig.show()


In [None]:
fig = px.scatter(accuracyExtra_df, template="ggplot2")

fig.update_xaxes(title_text="Layers")
fig.update_yaxes(title_text="Accuracy")

fig.update_layout(
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text='Language',
    title = "Accuracy for A-O classification per layer (overall, Extra Sample"
)

# pio.write_image(fig, "Figures/Accuracy for A-O classification per layer (overall, Extra Sample).png",scale=6, width=1000, height=700)

fig.show()

The highest accuracy for each language and layer on which it was achieved.

In [None]:
highestAccuracyExtra = {"Value": [], "Layer" : []}

for language in accuracyExtra_df.columns:
    highestAccuracyExtra["Value"].append(accuracyExtra_df[language].max())
    highestAccuracyExtra["Layer"].append(int(accuracyExtra_df[language].argmax()))

highestAccuracyExtradf = pd.DataFrame(highestAccuracyExtra, index=accuracyExtra_df.columns)
highestAccuracyExtradf

The mean accuracy for each language.

In [None]:
accuracyExtra_df.mean()

Here we calculate what was more often predicted as S argument.

In [None]:
fig = make_subplots(rows=3, cols=1, subplot_titles=(list(extraDir.keys())))

cnt = 1

for language in extraDir:
    lang_data = pd.DataFrame.from_dict(roleClassProportion(extraDir[language], "S"))

    fig.add_trace(
        go.Scatter(
            name="A",
            x=[i for i in range(13)],
            y=lang_data["A"],
            marker=dict(color="green"),
            legendgroup="A",
            showlegend=False,
        ),
        row=cnt,
        col=1
    )

    fig.add_trace(
        go.Scatter(
            name="O",
            x=[i for i in range(13)],
            y=lang_data["O"],
            marker=dict(color="goldenrod"),
            legendgroup="O",
            showlegend=False,
        ),
        row=cnt,
        col=1
    )

    cnt += 1

fig.update_layout(
    height=1000,
    width=800,
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text="Role",
    template="ggplot2",
    title_text="The proportion of S argument classified as A and O (Extra sample)"
)

fig.update_xaxes(title_text="Layers")
fig.update_yaxes(title_text="Proportion")

# pio.write_image(fig, "Figures/The proportion of S argument classified as A and O (Extra sample).png",scale=6, width=800, height=1000)

fig.show()


Here we calculate the proportion of S argument predicted as A or O with marking the animacy of S.

In [None]:
fig = make_subplots(rows=3, cols=1, subplot_titles=(list(extraDir.keys())))

cnt = 1

for language in extraDir:
    lang_data = pd.DataFrame(roleClassProportionAnimacy(extraDir[language], "S"))

    colors = {"A": "green", "O": "goldenrod"}

    for y in lang_data.role.unique():
        dfy = lang_data[lang_data.role == y]

        fig.add_trace(
            go.Bar(
                name=str(y),
                x=dfy.animacy,
                y=dfy.cnt,
                marker=dict(color=colors[str(y)]),
                legendgroup=str(y),
                showlegend=False,
            ),
            row=cnt,
            col=1,
        )

    cnt += 1

fig.update_layout(
    height=1000,
    width=800,
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text="Role",
    template="ggplot2",
    title_text="The amount of S classified as A and O with animacy distinction (Extra sample)"
)

# pio.write_image(fig, "Figures/The amount of S classified as A and O with animacy distinction (Extra sample).png",scale=6, width=800, height=1000)

fig.show()


Here we calculate what was more often predicted as S-passive.

In [None]:
fig = make_subplots(rows=3, cols=1, subplot_titles=(list(extraDir.keys())))

cnt = 1

for language in extraDir:
    lang_data = pd.DataFrame.from_dict(roleClassProportion(extraDir[language], "S-passive"))

    fig.add_trace(
        go.Scatter(
            name="A",
            x=[i for i in range(13)],
            y=lang_data["A"],
            marker=dict(color="green"),
            legendgroup="A",
            showlegend=False,
        ),
        row=cnt,
        col=1
    )

    fig.add_trace(
        go.Scatter(
            name="O",
            x=[i for i in range(13)],
            y=lang_data["O"],
            marker=dict(color="goldenrod"),
            legendgroup="O",
            showlegend=False,
        ),
        row=cnt,
        col=1
    )

    cnt += 1

fig.update_layout(
    height=1000,
    width=800,
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text="Role",
    template="ggplot2",
    title_text="The proportion of S-passive classified as A and O (Extra sample)"
)
fig.update_xaxes(title_text="Layers")
fig.update_yaxes(title_text="Proportion")

# pio.write_image(fig, "Figures/The proportion of S-passive classified as A and O (Extra sample).png",scale=6, width=800, height=1000)

fig.show()


## Discussion: Analyzing the word order

In this subsection we try to analyze the probable correlation between the predictions of the multilingual model and the usual word order of S, A and O arguments throughout all the languages from three samples.

At first we should upload [UD-data (v. 2.15)](https://universaldependencies.org/) to our google drive.

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
def distanceCount(path: str) -> list[list[int], list[int], list[int]]:
    """
    Count the distance between S, A, O-roles and theirs head.

    Arguments:
        path : str
            The path to the conllu-file.

    Return:
        list[list[int], list[int], list[int]]:
            Three lists (for S, A and O) with counted distanced for
            each noun that was found in conllu.

    """
    a_roles = []
    s_roles = []
    o_roles = []
    corpus = pyconll.load_from_file(path)

    for sentence in corpus:
        for token in sentence:
            if token.upos == "VERB":
                head = token
                children_subjects = [
                    int(token.id)
                    for token in sentence
                    if token.head == head.id
                    and token.upos in ["NOUN", "PROPN"]
                    and "nsubj" in token.deprel
                ]
                children_objects = [
                    int(token.id)
                    for token in sentence
                    if token.head == head.id
                    and token.upos in ["NOUN", "PROPN"]
                    and "obj" in token.deprel
                ]

                if len(children_objects) > 0:
                    for id_ in children_subjects:
                        a_roles.append(id_ - int(head.id))
                else:
                    for id_ in children_subjects:
                        s_roles.append(id_ - int(head.id))
                for id_ in children_objects:
                    o_roles.append(id_ - int(head.id))

    return s_roles, a_roles, o_roles


In [None]:
pathConllu = {
    "Basque": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Basque-BDT/eu_bdt-ud-train.conllu",
    "Chinese": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Czech-PDT/cs_pdt-ud-train.conllu",
    "Croatian": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Croatian-SET/hr_set-ud-train.conllu",
    "Czech": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Czech-PDT/cs_pdt-ud-train.conllu",
    "English": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_English-EWT/en_ewt-ud-train.conllu",
    "Estonian": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_English-EWT/en_ewt-ud-train.conllu",
    "Finnish": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Finnish-TDT/fi_tdt-ud-train.conllu",
    "French": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_French-GSD/fr_gsd-ud-train.conllu",
    "German": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_German-GSD/de_gsd-ud-train.conllu",
    "Hebrew": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Hebrew-HTB/he_htb-ud-train.conllu",
    "Hindi": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Hindi-HDTB/hi_hdtb-ud-train.conllu",
    "Indonesian": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Indonesian-GSD/id_gsd-ud-train.conllu",
    "Latin": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Latin-ITTB/la_ittb-ud-train.conllu",
    "Norwegian": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Norwegian-Bokmaal/no_bokmaal-ud-train.conllu",
    "Persian": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Persian-PerDT/fa_perdt-ud-train.conllu",
    "Polish": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Polish-PDB/pl_pdb-ud-train.conllu",
    "Russian": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Russian-SynTagRus/ru_syntagrus-ud-train.conllu",
    "Serbian": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Serbian-SET/sr_set-ud-train.conllu",
    "Slovak": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Slovak-SNK/sk_snk-ud-train.conllu",
    "Slovenian": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Slovenian-SSJ/sl_ssj-ud-train.conllu",
    "Spanish": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Spanish-AnCora/es_ancora-ud-train.conllu",
    "Urdu": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Urdu-UDTB/ur_udtb-ud-train.conllu",
    "Afrikaans": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Afrikaans-AfriBooms/af_afribooms-ud-train.conllu",
    "Arabic": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Arabic-NYUAD/ar_nyuad-ud-train.conllu",
    "Armenian": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Armenian-ArmTDP/hy_armtdp-ud-train.conllu",
    "Georgian": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Georgian-GLC/ka_glc-ud-train.conllu",
    "Greek": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Greek-GDT/el_gdt-ud-train.conllu",
    "Hungarian": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Hungarian-Szeged/hu_szeged-ud-train.conllu",
    "Japanese": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Japanese-GSD/ja_gsd-ud-train.conllu",
    "Korean": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Korean-Kaist/ko_kaist-ud-train.conllu",
    "Portuguese": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Portuguese-CINTIL/pt_cintil-ud-train.conllu",
    "Turkish": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Turkish-Penn/tr_penn-ud-train.conllu",
    "Vietnamese": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Vietnamese-VTB/vi_vtb-ud-train.conllu",
    "Naija": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Naija-NSC/pcm_nsc-ud-train.conllu",
    "North Saami": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_North_Sami-Giella/sme_giella-ud-train.conllu",
    "Sanskrit": "/content/drive/MyDrive/ud-treebanks-v2.15/UD_Sanskrit-Vedic/sa_vedic-ud-train.conllu"
}


In [None]:
titles = []

for key in pathConllu:
    titles += [
         f"{key}: S-role",
         f"{key}: A-role",
         f"{key}: O-role"
    ]

fig = make_subplots(rows=36, cols=3, subplot_titles=titles)

cnt = 1

for language in pathConllu:
    s_roles, a_roles, o_roles = distanceCount(pathConllu[language])

    fig.add_trace(
        go.Bar(
            x=sorted(set(s_roles)),
            y=[s_roles.count(value) for value in sorted(set(s_roles))],
                showlegend=False,
        ),
        row=cnt,
        col=1
    )

    fig.add_trace(
        go.Bar(
            x=sorted(set(a_roles)),
            y=[a_roles.count(value) for value in sorted(set(a_roles))],
                showlegend=False,
        ),
        row=cnt,
        col=2
    )

    fig.add_trace(
        go.Bar(
            x=sorted(set(o_roles)),
            y=[o_roles.count(value) for value in sorted(set(o_roles))],
                showlegend=False,
        ),
        row=cnt,
        col=3
    )

    cnt += 1

fig.update_layout(
    height=6000,
    width=800,
    font_family="Brill",
    font_color="black",
    title_font_family="Brill",
    legend_title_text="Role",
    template="ggplot2",
    title_text="The distances between S, A and O-roles and their heads"
)

# pio.write_image(fig, "Figures/The distances between S, A and O-roles and their heads.png",scale=6, width=800, height=6000)

fig.show()
