# Curriculum learning plots: before and after

In [13]:
import plotly.graph_objects as go


def plot_curriculum_results(datasets, initial_scores, final_scores):
    average_initial_score = sum(initial_scores) / len(initial_scores)
    average_final_score = sum(final_scores) / len(final_scores)
    print(f"Average initial score: {average_initial_score}")
    print(f"Average final score: {average_final_score}")
    
    initial_trace = go.Bar(
        x=datasets,
        y=initial_scores,
        name='Initial',
        marker=dict(
            color='rgb(31, 119, 180)'
        )
    )

    final_trace = go.Bar(
        x=datasets,
        y=final_scores,
        name='Final',
        marker=dict(
            color='rgb(255, 127, 14)'
        )
    )

    layout = go.Layout(
        title='BLEU Scores Before and After Fine-tuning',
        xaxis=dict(title='Dataset'),
        yaxis=dict(title='BLEU Score'),
        barmode='group'
    )

    fig = go.Figure(data=[initial_trace, final_trace], layout=layout)
    fig.show()

### Ordered by target vocabulary size

#### Experiment A

In [14]:
vocabulary_datasets = ["khresmoi-tr", "orphanet-terms", "clinspen-tr", "medline", "preferred-en2es", "snomed", "orphanet-definitions-tr", "pubmed-tr"]
vocabulary_initial = [45.8, 44.1, 40.3, 53.7, 30.5, 25.4, 55.8, 43.8]
vocabulary_final = [49.6, 50.5, 41.0, 54.5, 32.3, 26.4, 58.1, 44.1]
plot_curriculum_results(vocabulary_datasets, vocabulary_initial, vocabulary_final)

Average initial score: 42.425
Average final score: 44.5625


#### Experiment B: higher LRs

In [15]:
vocabulary_initial = [47.5, 39.2, 38.7, 52.3, 29.8, 24.9, 44.1, 43.6]
vocabulary_final = [49.6, 50.5, 42.7, 55.0, 34.1, 28.8, 59.2, 43.7]
plot_curriculum_results(vocabulary_datasets, vocabulary_initial, vocabulary_final)

Average initial score: 40.0125
Average final score: 45.449999999999996


### Experiment C: marginally higher LRs

In [16]:
initial = [47.5, 39.2, 38.7, 52.1, 30.0, 25.0, 44.2, 43.6]
final = [49.6, 50.4, 42.9, 55.1, 34.4, 28.9, 59.0, 43.9]
plot_curriculum_results(vocabulary_datasets, initial, final)

Average initial score: 40.0375
Average final score: 45.525
