# Complexity scattering for fine-tuned and original models

In [29]:
import plotly.graph_objects as go
import pandas as pd

In [30]:
dataset_names = ['clinspen-te', 'hpo', 'khresmoi-te', 'orphanet-definitions-te', 'pubmed-te']
vocab_sizes = [4893, 10776, 3324, 7796, 5563]
number_pairs = [2519, 17875, 500, 703, 699]
complexities = [v / np for v, np in zip(vocab_sizes, number_pairs)]
original_sacrebleu = [39, 47.8, 49.5, 46.3, 48.4]
finetuned_all_sacrebleu = [54.5, 48.7, 47.9, 61.4, 45.9]
finetuned_cl_sacrebleu = [40.7, 49.8, 48.5, 57.7, 48.2]


def build_trace(sacrebleu_scores: list[float], name: str):
    df = pd.DataFrame({'Dataset': dataset_names, 'Complexity': complexities, 'SacreBLEU Score': sacrebleu_scores})
    df = df.sort_values(by='Complexity')

    return go.Scatter(
        x=df['Complexity'],
        y=df['SacreBLEU Score'],
        mode='markers+lines',
        name=name,
        text=df['Dataset'],
        marker=dict(size=10)
    )

trace1 = build_trace(original_sacrebleu, 'Helsinki-NLP')
trace2 = build_trace(finetuned_all_sacrebleu, 'FT All')
trace3 = build_trace(finetuned_cl_sacrebleu, 'FT CL')

fig=go.Figure()
fig.add_trace(trace1)
fig.add_trace(trace2)
fig.add_trace(trace3)

fig.update_layout(
    title='Comparison of SacreBLEU Scores vs Vocabulary Density',
    xaxis_title='Vocabulary Density',
    yaxis_title='SacreBLEU Score',
    legend_title='Models',
    hovermode='closest'
)

fig.show()