In [None]:
from scripts import prepare_mt5_finetuning, plot_loss, evaluate_model, save_model, save_tokenizer
import torch
import json

Set up the configuration of the mT5 model that should be trained here. Add multiple configurations to do multiple training runs back to back.

In [None]:
hf_token = None # set huggingface token for hub download and upload
hub_name= None # set huggingface username

# Set train, validation and test files before training
file_train = "" 
file_val = ""
file_test = ""

runs = [
    {
        "model_id": "google/mT5-small",
        "finetune_id": "mT5-small-test",
        "file_train": file_train,
        "file_val" : file_val,
        "tokenizer_id": "google/mT5-small",
        "trim_model": False,
        "file_test" : file_test,
        "lr": 1e-3,
        "epochs": 1,
    },
]

Sets up the model for the training process and then starts the process by calling trainer.train()

In [None]:

for run in runs:
    trainer, tokenizer = prepare_mt5_finetuning(
        model_id=run["model_id"],
        tokenizer_id=run["tokenizer_id"],
        trim_model=run["trim_model"],
        finetuned_model_id=run["finetune_id"],
        file_train=run["file_train"],
        file_val=run["file_val"],
        learning_rate=run["lr"],
        num_train_epochs=run["epochs"],
        hf_token=hf_token,
        push_to_hub=False,
        # unfreeze_embedding=True,
    )

    trainer.train()
    save_model(trainer, run["finetune_id"], hf_token=hf_token, hub_name=hub_name)
    if run["trim_model"]:
        save_tokenizer(tokenizer, run["finetune_id"], hf_token=hf_token, hub_name=hub_name)
    with open(f'{run["finetune_id"]}/{run["finetune_id"]}_run_stats.json', "w") as f:
        json.dump(run, f)
        f.close()

    torch.cuda.empty_cache()
    

Starts the evaluation process. By default, this will generate predictions for the whole test-set, but the amount of predictions can be limited by n.

In [None]:
for run in runs:
    print(run)
    evaluate_model(
        peft_model_id=run["finetune_id"], 
        base_model_id=run["model_id"],
        tokenizer_id=run["tokenizer_id"], 
        file_test=run["file_test"], 
        save_dir=run["finetune_id"], 
        # n=50,
        hf_token=hf_token,
        hub_name=hub_name,
        resize_embedding=run["trim_model"],
    )
    torch.cuda.empty_cache()