In [1]:
# !pip install scikit-learn

[0m

In [2]:
import os
import time
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import RobertaForSequenceClassification, RobertaTokenizerFast, AdamW
# import datasets
import pandas as pd
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from matplotlib import pyplot as plt
import torch.backends.cudnn as cudnn
pd.set_option('display.max_rows', None)

In [3]:
ROOT_PATH = os.getcwd()
# DATA_PATH = os.path.join(ROOT_PATH, 'finetune_dataset/kge_sentiment_analysis')
DATA_PATH = os.path.join(ROOT_PATH, '../../research_datasets/finetune_dataset/kge_sentiment_analysis')

# Set device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [4]:
MODEL_NAME = 'roberta-large'

In [5]:
# # Load the SST-5 dataset
dataset = pd.read_csv(DATA_PATH + '/train.tsv.zip', sep="\t")
train, valid = train_test_split(dataset, random_state=42, test_size = 0.1)

In [6]:
# Load the tokenizer
tokenizer = RobertaTokenizerFast.from_pretrained(MODEL_NAME)

# Tokenize the dataset
def tokenize_dataset(dataset):
    encodings = tokenizer(dataset['Phrase'].tolist(), truncation=True, padding=True)
    labels = dataset['Sentiment'].tolist()
    return encodings, labels

train_encodings, train_labels = tokenize_dataset(train)
valid_encodings, valid_labels = tokenize_dataset(valid)

In [7]:
def get_encodings_df(encodings, idx):
    ids, tokens, attention_mask = [], [], []
    i = 0

    for get_ids in encodings[idx].ids:
        ids += [get_ids]
        
    for get_tokens in encodings[idx].tokens:
        tokens += [get_tokens.replace('Ġ', '')]

    for get_attention_mask in encodings[idx].attention_mask:
        attention_mask += [get_attention_mask]
        i = i+1

    show_tokens_DF = pd.DataFrame({'ids': ids, 'tokens': tokens, 'attention_mask': attention_mask})
    print('got ' + str(i))
    return show_tokens_DF

In [8]:
# show_tokens_DF = get_encodings_df(test_encodings, 99)
# show_tokens_DF

In [9]:
def get_encodings_df(encodings, idx):
    input_ids, attention_mask = [], []
    i = 0

    for get_input_ids in encodings['input_ids'][idx]:
        input_ids += [get_input_ids]

    for get_attention_mask in encodings['attention_mask'][idx]:
        attention_mask += [get_attention_mask]
        i = i+1

    show_tokens_DF = pd.DataFrame({'input_ids': input_ids, 'attention_mask': attention_mask})
    print('got ' + str(i))
    return show_tokens_DF

In [10]:
# show_tokens_DF = get_encodings_df(test_encodings, 99)
# show_tokens_DF

In [11]:
# Create a torch dataset
class TsvDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        # item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item = {}
        item['input_ids'] = torch.tensor(self.encodings['input_ids'][idx])
        item['attention_mask'] = torch.tensor(self.encodings['attention_mask'][idx])
        item['labels'] = torch.tensor(self.labels[idx])

        return item

    def __len__(self):
        return len(self.labels)

In [12]:
train_dataset = TsvDataset(train_encodings, train_labels)
valid_dataset = TsvDataset(valid_encodings, valid_labels)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, drop_last=True, generator=torch.Generator(device='cuda'))
valid_loader = DataLoader(valid_dataset, batch_size=16, shuffle=True, drop_last=True, generator=torch.Generator(device='cuda'))

In [13]:
model = RobertaForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=5)

Some weights of the model checkpoint at roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.out_proj.weight', 'clas

In [14]:
class Trainer(object):
    '''This class takes care of training and validation of our model'''
    def __init__(self, model, train_loader, valid_loader, device=torch.device("cuda:0")):
        torch.set_default_tensor_type("torch.cuda.FloatTensor")
        cudnn.benchmark = True
        self.best_loss = float("inf")
        self.phases = ["train", "valid"]

        self.device = device
        self.dataloaders = {self.phases[0]: train_loader, self.phases[1]: valid_loader}
    
        self.lr = 5e-6
        self.num_epochs = 400
        # self.accumulation_steps = 32 // self.dataloaders['train'].batch_size
        
        self.model = model.to(self.device)
        self.optimizer = AdamW(model.parameters(), lr=1e-5)
        # self.scheduler = ReduceLROnPlateau(self.optimizer, mode="min", patience=3, verbose=True)
        
        self.losses =   {phase: [] for phase in self.phases}
        self.accuracy = {phase: [] for phase in self.phases}

    def iterate(self, epoch, phase):
        running_losses = []
        num_correct = 0
        num_samples = 0
        nowtime = time.strftime("%H:%M:%S")
        print(f"[{nowtime}] type: {phase} epoch: {epoch}")

        if phase == "train":
            self.model.train()
        else:
            self.model.eval()
        dataloader = self.dataloaders[phase]
        # total_batches = len(dataloader)

        self.optimizer.zero_grad()
        pbar = tqdm(dataloader, total=len(dataloader), position=0, leave=True, desc=f"Epoch {epoch}")
        for itr, batch in enumerate(pbar):
            batch = {key: value.to(self.device) for key, value in batch.items()}
            outputs = self.model(**batch)
            loss = outputs.loss

            if phase == "train":
                loss.backward()
                self.optimizer.step()
                self.optimizer.zero_grad()

            # loss = loss / self.accumulation_steps
            # if phase == "train":
            #     loss.backward()
            #     if (itr + 1 ) % self.accumulation_steps == 0:
            #         self.optimizer.step()
            #         self.optimizer.zero_grad()

            running_losses += [loss.item()]

            _, predictions = outputs['logits'].max(1)
            label = batch['labels']
            num_correct += (predictions == label).sum()
            num_samples += predictions.size(0)
            
        # epoch_loss = (running_loss * self.accumulation_steps) / total_batches
        epoch_loss = sum(running_losses) / len(running_losses)
        epoch_accuracy = (num_correct/num_samples).item()
        
        self.losses[phase].append(epoch_loss)
        self.accuracy[phase].append(epoch_accuracy)

        print("loss: %0.4f | accuracy: %0.4f" % (epoch_loss, epoch_accuracy))

        torch.cuda.empty_cache()
        return epoch_loss

    def start(self):
        for epoch in range(self.num_epochs):
            self.iterate(epoch, "train")
            state = {
                "epoch": epoch,
                "best_loss": self.best_loss,
                "state_dict": self.model.state_dict(),
                "optimizer": self.optimizer.state_dict(),
            }

            valid_loss = self.iterate(epoch, "valid")
            # self.scheduler.step(valid_loss)
            if valid_loss < self.best_loss:
                print("******** New optimal found, saving state ********")
                state["best_loss"] = self.best_loss = valid_loss
                nowtime = time.strftime("%H:%M:%S")
                torch.save(state, "./" + nowtime + "model.pth")
            print()

In [None]:
model_trainer = Trainer(model, train_loader, valid_loader, device)
model_trainer.start()

Epoch 0:   0%|          | 0/2194 [00:00<?, ?it/s]

[10:16:09] type: train epoch: 0


Epoch 0: 100%|██████████| 2194/2194 [23:12<00:00,  1.58it/s]
Epoch 0:   0%|          | 2/975 [00:00<01:26, 11.24it/s]

loss: nan | accuracy: 0.0665
[10:39:22] type: valid epoch: 0


Epoch 0: 100%|██████████| 975/975 [00:46<00:00, 21.12it/s]
Epoch 1:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[10:40:08] type: train epoch: 1


Epoch 1: 100%|██████████| 2194/2194 [23:25<00:00,  1.56it/s]
Epoch 1:   0%|          | 2/975 [00:00<01:15, 12.81it/s]

loss: nan | accuracy: 0.0664
[11:03:34] type: valid epoch: 1


Epoch 1: 100%|██████████| 975/975 [00:46<00:00, 21.16it/s]
Epoch 2:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[11:04:20] type: train epoch: 2


Epoch 2: 100%|██████████| 2194/2194 [23:24<00:00,  1.56it/s]
Epoch 2:   0%|          | 2/975 [00:00<01:23, 11.64it/s]

loss: nan | accuracy: 0.0661
[11:27:45] type: valid epoch: 2


Epoch 2: 100%|██████████| 975/975 [00:46<00:00, 21.18it/s]
Epoch 3:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[11:28:31] type: train epoch: 3


Epoch 3: 100%|██████████| 2194/2194 [23:23<00:00,  1.56it/s]
Epoch 3:   0%|          | 2/975 [00:00<01:14, 13.02it/s]

loss: nan | accuracy: 0.0669
[11:51:55] type: valid epoch: 3


Epoch 3: 100%|██████████| 975/975 [00:46<00:00, 21.17it/s]
Epoch 4:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[11:52:41] type: train epoch: 4


Epoch 4: 100%|██████████| 2194/2194 [23:26<00:00,  1.56it/s]
Epoch 4:   0%|          | 2/975 [00:00<01:14, 13.12it/s]

loss: nan | accuracy: 0.0664
[12:16:07] type: valid epoch: 4


Epoch 4: 100%|██████████| 975/975 [00:46<00:00, 21.09it/s]
Epoch 5:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[12:16:53] type: train epoch: 5


Epoch 5: 100%|██████████| 2194/2194 [23:23<00:00,  1.56it/s]
Epoch 5:   0%|          | 1/975 [00:00<01:53,  8.59it/s]

loss: nan | accuracy: 0.0660
[12:40:18] type: valid epoch: 5


Epoch 5: 100%|██████████| 975/975 [00:46<00:00, 21.11it/s]
Epoch 6:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[12:41:04] type: train epoch: 6


Epoch 6: 100%|██████████| 2194/2194 [23:19<00:00,  1.57it/s]
Epoch 6:   0%|          | 1/975 [00:00<01:38,  9.87it/s]

loss: nan | accuracy: 0.0660
[13:04:24] type: valid epoch: 6


Epoch 6: 100%|██████████| 975/975 [00:46<00:00, 21.13it/s]
Epoch 7:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0621

[13:05:10] type: train epoch: 7


Epoch 7: 100%|██████████| 2194/2194 [23:20<00:00,  1.57it/s]
Epoch 7:   0%|          | 1/975 [00:00<01:40,  9.68it/s]

loss: nan | accuracy: 0.0667
[13:28:31] type: valid epoch: 7


Epoch 7: 100%|██████████| 975/975 [00:45<00:00, 21.21it/s]
Epoch 8:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[13:29:17] type: train epoch: 8


Epoch 8: 100%|██████████| 2194/2194 [23:22<00:00,  1.56it/s]
Epoch 8:   0%|          | 2/975 [00:00<01:18, 12.42it/s]

loss: nan | accuracy: 0.0666
[13:52:39] type: valid epoch: 8


Epoch 8: 100%|██████████| 975/975 [00:46<00:00, 21.10it/s]
Epoch 9:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[13:53:25] type: train epoch: 9


Epoch 9: 100%|██████████| 2194/2194 [23:24<00:00,  1.56it/s]


loss: nan | accuracy: 0.0669


Epoch 9:   0%|          | 2/975 [00:00<01:16, 12.77it/s]

[14:16:50] type: valid epoch: 9


Epoch 9: 100%|██████████| 975/975 [00:46<00:00, 21.19it/s]
Epoch 10:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[14:17:36] type: train epoch: 10


Epoch 10: 100%|██████████| 2194/2194 [23:23<00:00,  1.56it/s]
Epoch 10:   0%|          | 2/975 [00:00<01:13, 13.26it/s]

loss: nan | accuracy: 0.0660
[14:40:59] type: valid epoch: 10


Epoch 10: 100%|██████████| 975/975 [00:46<00:00, 21.18it/s]
Epoch 11:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[14:41:45] type: train epoch: 11


Epoch 11: 100%|██████████| 2194/2194 [23:22<00:00,  1.56it/s]


loss: nan | accuracy: 0.0659


Epoch 11:   0%|          | 2/975 [00:00<01:15, 12.83it/s]

[15:05:08] type: valid epoch: 11


Epoch 11: 100%|██████████| 975/975 [00:46<00:00, 21.08it/s]
Epoch 12:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[15:05:55] type: train epoch: 12


Epoch 12: 100%|██████████| 2194/2194 [23:24<00:00,  1.56it/s]
Epoch 12:   0%|          | 2/975 [00:00<01:16, 12.67it/s]

loss: nan | accuracy: 0.0661
[15:29:19] type: valid epoch: 12


Epoch 12: 100%|██████████| 975/975 [00:46<00:00, 21.15it/s]
Epoch 13:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[15:30:06] type: train epoch: 13


Epoch 13: 100%|██████████| 2194/2194 [23:22<00:00,  1.56it/s]
Epoch 13:   0%|          | 2/975 [00:00<01:12, 13.43it/s]

loss: nan | accuracy: 0.0663
[15:53:29] type: valid epoch: 13


Epoch 13: 100%|██████████| 975/975 [00:46<00:00, 21.10it/s]
Epoch 14:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[15:54:15] type: train epoch: 14


Epoch 14: 100%|██████████| 2194/2194 [23:23<00:00,  1.56it/s]
Epoch 14:   0%|          | 2/975 [00:00<01:24, 11.49it/s]

loss: nan | accuracy: 0.0666
[16:17:39] type: valid epoch: 14


Epoch 14: 100%|██████████| 975/975 [00:46<00:00, 21.15it/s]
Epoch 15:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[16:18:25] type: train epoch: 15


Epoch 15: 100%|██████████| 2194/2194 [23:25<00:00,  1.56it/s]
Epoch 15:   0%|          | 2/975 [00:00<01:25, 11.34it/s]

loss: nan | accuracy: 0.0664
[16:41:51] type: valid epoch: 15


Epoch 15: 100%|██████████| 975/975 [00:46<00:00, 21.08it/s]
Epoch 16:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[16:42:37] type: train epoch: 16


Epoch 16: 100%|██████████| 2194/2194 [23:28<00:00,  1.56it/s]
Epoch 16:   0%|          | 2/975 [00:00<01:27, 11.10it/s]

loss: nan | accuracy: 0.0671
[17:06:06] type: valid epoch: 16


Epoch 16: 100%|██████████| 975/975 [00:46<00:00, 21.01it/s]
Epoch 17:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[17:06:52] type: train epoch: 17


Epoch 17: 100%|██████████| 2194/2194 [23:26<00:00,  1.56it/s]
Epoch 17:   0%|          | 2/975 [00:00<01:19, 12.18it/s]

loss: nan | accuracy: 0.0659
[17:30:19] type: valid epoch: 17


Epoch 17: 100%|██████████| 975/975 [00:46<00:00, 21.09it/s]
Epoch 18:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[17:31:05] type: train epoch: 18


Epoch 18: 100%|██████████| 2194/2194 [23:24<00:00,  1.56it/s]
Epoch 18:   0%|          | 1/975 [00:00<01:53,  8.60it/s]

loss: nan | accuracy: 0.0669
[17:54:30] type: valid epoch: 18


Epoch 18: 100%|██████████| 975/975 [00:46<00:00, 21.13it/s]
Epoch 19:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[17:55:16] type: train epoch: 19


Epoch 19: 100%|██████████| 2194/2194 [23:21<00:00,  1.56it/s]
Epoch 19:   0%|          | 2/975 [00:00<01:17, 12.62it/s]

loss: nan | accuracy: 0.0665
[18:18:39] type: valid epoch: 19


Epoch 19: 100%|██████████| 975/975 [00:46<00:00, 20.98it/s]
Epoch 20:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[18:19:25] type: train epoch: 20


Epoch 20: 100%|██████████| 2194/2194 [23:19<00:00,  1.57it/s]
Epoch 20:   0%|          | 2/975 [00:00<01:12, 13.43it/s]

loss: nan | accuracy: 0.0669
[18:42:45] type: valid epoch: 20


Epoch 20: 100%|██████████| 975/975 [00:45<00:00, 21.20it/s]
Epoch 21:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[18:43:32] type: train epoch: 21


Epoch 21: 100%|██████████| 2194/2194 [23:20<00:00,  1.57it/s]
Epoch 21:   0%|          | 2/975 [00:00<01:14, 13.00it/s]

loss: nan | accuracy: 0.0663
[19:06:52] type: valid epoch: 21


Epoch 21: 100%|██████████| 975/975 [00:46<00:00, 21.13it/s]
Epoch 22:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[19:07:39] type: train epoch: 22


Epoch 22: 100%|██████████| 2194/2194 [23:19<00:00,  1.57it/s]
Epoch 22:   0%|          | 1/975 [00:00<01:38,  9.87it/s]

loss: nan | accuracy: 0.0666
[19:30:58] type: valid epoch: 22


Epoch 22: 100%|██████████| 975/975 [00:46<00:00, 21.13it/s]
Epoch 23:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[19:31:44] type: train epoch: 23


Epoch 23: 100%|██████████| 2194/2194 [23:20<00:00,  1.57it/s]
Epoch 23:   0%|          | 2/975 [00:00<01:25, 11.38it/s]

loss: nan | accuracy: 0.0664
[19:55:04] type: valid epoch: 23


Epoch 23: 100%|██████████| 975/975 [00:46<00:00, 21.10it/s]
Epoch 24:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[19:55:51] type: train epoch: 24


Epoch 24: 100%|██████████| 2194/2194 [23:23<00:00,  1.56it/s]
Epoch 24:   0%|          | 2/975 [00:00<01:19, 12.28it/s]

loss: nan | accuracy: 0.0658
[20:19:14] type: valid epoch: 24


Epoch 24: 100%|██████████| 975/975 [00:46<00:00, 21.03it/s]
Epoch 25:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[20:20:00] type: train epoch: 25


Epoch 25: 100%|██████████| 2194/2194 [23:23<00:00,  1.56it/s]
Epoch 25:   0%|          | 2/975 [00:00<01:22, 11.81it/s]

loss: nan | accuracy: 0.0674
[20:43:25] type: valid epoch: 25


Epoch 25: 100%|██████████| 975/975 [00:46<00:00, 21.01it/s]
Epoch 26:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[20:44:11] type: train epoch: 26


Epoch 26: 100%|██████████| 2194/2194 [23:24<00:00,  1.56it/s]
Epoch 26:   0%|          | 2/975 [00:00<01:09, 13.96it/s]

loss: nan | accuracy: 0.0663
[21:07:35] type: valid epoch: 26


Epoch 26: 100%|██████████| 975/975 [00:46<00:00, 21.17it/s]
Epoch 27:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[21:08:21] type: train epoch: 27


Epoch 27: 100%|██████████| 2194/2194 [23:20<00:00,  1.57it/s]
Epoch 27:   0%|          | 2/975 [00:00<01:21, 11.88it/s]

loss: nan | accuracy: 0.0667
[21:31:42] type: valid epoch: 27


Epoch 27: 100%|██████████| 975/975 [00:46<00:00, 21.10it/s]
Epoch 28:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[21:32:28] type: train epoch: 28


Epoch 28: 100%|██████████| 2194/2194 [23:20<00:00,  1.57it/s]
Epoch 28:   0%|          | 2/975 [00:00<01:10, 13.80it/s]

loss: nan | accuracy: 0.0666
[21:55:49] type: valid epoch: 28


Epoch 28: 100%|██████████| 975/975 [00:46<00:00, 21.13it/s]
Epoch 29:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[21:56:35] type: train epoch: 29


Epoch 29: 100%|██████████| 2194/2194 [23:21<00:00,  1.57it/s]
Epoch 29:   0%|          | 2/975 [00:00<01:18, 12.43it/s]

loss: nan | accuracy: 0.0674
[22:19:57] type: valid epoch: 29


Epoch 29: 100%|██████████| 975/975 [00:46<00:00, 21.09it/s]
Epoch 30:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[22:20:43] type: train epoch: 30


Epoch 30: 100%|██████████| 2194/2194 [23:22<00:00,  1.56it/s]
Epoch 30:   0%|          | 2/975 [00:00<01:23, 11.72it/s]

loss: nan | accuracy: 0.0671
[22:44:06] type: valid epoch: 30


Epoch 30: 100%|██████████| 975/975 [00:46<00:00, 21.13it/s]
Epoch 31:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[22:44:52] type: train epoch: 31


Epoch 31: 100%|██████████| 2194/2194 [23:22<00:00,  1.56it/s]


loss: nan | accuracy: 0.0655


Epoch 31:   0%|          | 2/975 [00:00<01:21, 11.90it/s]

[23:08:14] type: valid epoch: 31


Epoch 31: 100%|██████████| 975/975 [00:46<00:00, 21.02it/s]
Epoch 32:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[23:09:01] type: train epoch: 32


Epoch 32: 100%|██████████| 2194/2194 [23:22<00:00,  1.56it/s]
Epoch 32:   0%|          | 2/975 [00:00<01:13, 13.27it/s]

loss: nan | accuracy: 0.0660
[23:32:23] type: valid epoch: 32


Epoch 32: 100%|██████████| 975/975 [00:46<00:00, 21.15it/s]
Epoch 33:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[23:33:09] type: train epoch: 33


Epoch 33: 100%|██████████| 2194/2194 [23:22<00:00,  1.56it/s]
Epoch 33:   0%|          | 2/975 [00:00<01:14, 13.03it/s]

loss: nan | accuracy: 0.0668
[23:56:32] type: valid epoch: 33


Epoch 33: 100%|██████████| 975/975 [00:46<00:00, 21.17it/s]
Epoch 34:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[23:57:18] type: train epoch: 34


Epoch 34: 100%|██████████| 2194/2194 [23:22<00:00,  1.56it/s]
Epoch 34:   0%|          | 2/975 [00:00<01:21, 11.91it/s]

loss: nan | accuracy: 0.0666
[00:20:41] type: valid epoch: 34


Epoch 34: 100%|██████████| 975/975 [00:46<00:00, 21.08it/s]
Epoch 35:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[00:21:27] type: train epoch: 35


Epoch 35: 100%|██████████| 2194/2194 [23:18<00:00,  1.57it/s]


loss: nan | accuracy: 0.0652


Epoch 35:   0%|          | 2/975 [00:00<01:19, 12.24it/s]

[00:44:46] type: valid epoch: 35


Epoch 35: 100%|██████████| 975/975 [00:46<00:00, 21.12it/s]
Epoch 36:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[00:45:32] type: train epoch: 36


Epoch 36: 100%|██████████| 2194/2194 [23:19<00:00,  1.57it/s]
Epoch 36:   0%|          | 2/975 [00:00<01:18, 12.46it/s]

loss: nan | accuracy: 0.0659
[01:08:52] type: valid epoch: 36


Epoch 36: 100%|██████████| 975/975 [00:46<00:00, 21.06it/s]
Epoch 37:   0%|          | 0/2194 [00:00<?, ?it/s]

loss: nan | accuracy: 0.0622

[01:09:39] type: train epoch: 37


Epoch 37:  14%|█▍        | 311/2194 [03:18<20:05,  1.56it/s]

In [None]:
# PLOT TRAINING
losses = model_trainer.losses
accuracy = model_trainer.accuracy

def plot(scores, name):
    plt.figure(figsize=(15,5))
    plt.plot(range(len(scores["train"])), scores["train"], label=f'train {name}')
    plt.plot(range(len(scores["train"])), scores["valid"], label=f'valid {name}')
    plt.title(f'{name} plot')
    plt.xlabel('Epoch')
    plt.ylabel(f'{name}')
    plt.legend()
    plt.show()

plot(losses, "loss")
plot(accuracy, "accuracy")