# Test results graphing

## Mean averages

In [13]:
import pandas as pd
import plotly.graph_objects as go


def plot_mean_bar_chart(df: pd.DataFrame,
                        lowest_score: int,
                        highest_score: int):
    fig = go.Figure()

    colours = ['navy', 'steelblue', 'blue', 'orangered', 'limegreen']
    for column, color in zip(df.columns, colours):
        fig.add_trace(go.Bar(x=df.index, y=df[column], name=column, marker=dict(color=color)))

    fig.update_layout(barmode='group',
                      xaxis=dict(title='Metric'),
                      yaxis=dict(title='Mean corpus score', range=[lowest_score, highest_score],
                                 tickvals=list(range(lowest_score, highest_score + 1, 5))),
                      width=800,
                      legend=dict(x=0, y=1))

    fig.show()

In [14]:
mean_results = pd.DataFrame([['FT All', 52.1, 65.3, 94.4],
                             ['FT CL', 49.4, 63.6, 94.1],
                             ['Helsinki-NLP', 46.6, 61.4, 93.6],
                             ['NLLB 3.3B', 44.7, 57.3, 92.8],
                             ['MADLAD-400', 49.1, 61.5, 93.5]],
                            columns=['Model', 'SacreBLEU', "`TER`", 'Semantic similarity'])
mean_results = mean_results.set_index('Model').T

plot_mean_bar_chart(mean_results, 40, 95)

## SacreBLEU scores

In [15]:
def plot_sacrebleu_bar_chart(df: pd.DataFrame):
    colours = ['navy', 'steelblue', 'blue', 'orangered', 'limegreen']

    trace1 = go.Bar(x=df['Dataset'], y=df['FT All'], name='FT All', marker=dict(color=colours[0]))
    trace2 = go.Bar(x=df['Dataset'], y=df['FT CL'], name='FT CL', marker=dict(color=colours[1]))
    trace3 = go.Bar(x=df['Dataset'], y=df['Helsinki-NLP'], name='Helsinki-NLP', marker=dict(color=colours[2]))
    trace4 = go.Bar(x=df['Dataset'], y=df['NLLB 3.3B'], name='NLLB 3.3B', marker=dict(color=colours[3]))
    trace5 = go.Bar(x=df['Dataset'], y=df['MADLAD-400'], name='MADLAD-400', marker=dict(color=colours[4]))

    data = [trace1, trace2, trace3, trace4, trace5]

    layout = go.Layout(title='SacreBLEU Scores for Machine Translation Models',
                       xaxis=dict(title='Dataset'),
                       yaxis=dict(title='SacreBLEU', range=[30, 65]),
                       barmode='group')

    # Create the figure and plot
    fig = go.Figure(data=data, layout=layout)
    fig.show()

In [16]:
sacrebleu_results = pd.DataFrame([['clinspen-te', 55.4, 41.9, 40.2, 36.1, 38.8],
                                  ['hpo', 49.2, 50.2, 48.2, 45.1, 54.2],
                                  ['khresmoi-te', 47.9, 49.6, 49.5, 49.3, 50.1],
                                  ['orphanet-definitions-te', 61.6, 57.7, 46.4, 45.9, 50.8],
                                  ['pubmed-te', 46.3, 48.5, 48.7, 49.3, 51.8]],
                                 columns=['Dataset', 'FT All', 'FT CL', 'Helsinki-NLP', 'NLLB 3.3B', 'MADLAD-400'])

plot_sacrebleu_bar_chart(sacrebleu_results)