# Complexity scattering for fine-tuned and original models

In [11]:
import plotly.graph_objects as go
import pandas as pd

In [12]:
dataset_names = ['clinspen-te', 'hpo', 'khresmoi-te', 'orphanet-definitions-te', 'pubmed-te']
target_lengths = [5.5, 4.9, 20.5, 56.7, 26.9]
vocab_sizes = [4893, 10776, 3324, 7796, 5563]
complexities = [v / l for v, l in zip(vocab_sizes, target_lengths)]
original_sacrebleu = [39, 47.8, 49.5, 46.3, 48.4]
finetuned_sacrebleu = [54.5, 48.7, 47.9, 61.4, 45.9]

original = {
    'Dataset': dataset_names,
    'Complexity': complexities,
    'SacreBLEU Score': original_sacrebleu
}

finetuned = {
    'Dataset': dataset_names,
    'Complexity': complexities,
    'SacreBLEU Score': finetuned_sacrebleu
}

original_df = pd.DataFrame(original)
original_df = original_df.sort_values(by='Complexity')
finetuned_df = pd.DataFrame(finetuned)
finetuned_df = finetuned_df.sort_values(by='Complexity')

trace1 = go.Scatter(
    x=original_df['Complexity'],
    y=original_df['SacreBLEU Score'],
    mode='markers+lines',
    name='Original',
    text=original_df['Dataset'],
    marker=dict(size=10)
)

trace2 = go.Scatter(
    x=finetuned_df['Complexity'],
    y=finetuned_df['SacreBLEU Score'],
    mode='markers+lines',
    name='Fine-tuned',
    text=finetuned_df['Dataset'],
    marker=dict(size=10)
)

fig = go.Figure()
fig.add_trace(trace1)
fig.add_trace(trace2)

fig.update_layout(
    title='Comparison of SacreBLEU Scores vs Complexity',
    xaxis_title='Complexity',
    xaxis=dict(type='log'),
    yaxis_title='SacreBLEU Score',
    legend_title='SacreBLEU Scores',
    hovermode='closest'
)

fig.show()