In [None]:
!nvidia-smi

In [None]:
%load_ext tensorboard

In [None]:
import pandas as pd

import torch
import transformers
import torchmetrics
import pytorch_lightning as pl

from nli_dataset import NLIDataModule,NLIDataset
from nli_finetune import NLIFineTuningModel
from error_analysis import get_error_samples
from mnli import mnli_df

from config import CONFIG

In [None]:
_=pl.seed_everything(CONFIG['SEED'])

## Training

In [None]:
model_save_checkpoint = pl.callbacks.ModelCheckpoint(
    monitor='val_loss',
    dirpath=CONFIG['CHECKPOINT_DIR'],
    filename=f"{CONFIG['MODEL_SAVE_NAME']}"+'-{epoch:02d}-{val_loss:.2f}',
    save_top_k=1,
    mode='min',
)

In [None]:
trainer = pl.Trainer(gpus=torch.cuda.device_count(),
                     max_epochs=CONFIG['MAX_EPOCHS'],
                     callbacks=[model_save_checkpoint],
                     precision=CONFIG['PRECISION'],
                     num_sanity_val_steps=0
                    )

In [None]:
model=NLIFineTuningModel(
    model_name_or_path=CONFIG['MODEL_NAME_OR_PATH'],
    num_labels=CONFIG['NUM_CLASSES'],
    learning_rate=CONFIG['LEARNING_RATE'],
    adam_epsilon=CONFIG['ADAM_EPSILON'],
    weight_decay=CONFIG['WEIGHT_DECAY'],
    max_len=CONFIG['MAX_LEN'],
    warmup_steps=CONFIG['WARMUP_STEPS'],
    max_epochs=trainer.max_epochs,
    gpus=trainer.gpus,
    accumulate_grad_batches=trainer.accumulate_grad_batches,
)

mnli_dm=NLIDataModel(get_split_def=mnli_df)
trainer.fit(model,mnli_dm)

In [None]:
trainer.logged_metrics

In [None]:
trainer.test()