In [1]:
!pip install -r requirements.txt

You should consider upgrading via the '/home/zelcakok/Desktop/Dev/COMP5423-emotion-classification/venv/bin/python -m pip install --upgrade pip' command.[0m[33m
[0m

In [2]:
import random
import numpy as np
import torch

seed_val = 42
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed(seed_val)

import pandas as pd
from transformers import BertForSequenceClassification, get_linear_schedule_with_warmup, BertConfig
from torch.optim import AdamW

from utils import split_tensor_datasets, get_training_dataset_loader, get_validate_dataset_loader
from training_kit import TrainingKit
from trainer import Trainer
from data_preprocessor import DataProcessor

def debug_params(model):
    params = list(model.named_parameters())
    print('The BERT model has {:} different named parameters.\n'.format(
        len(params)))
    print('==== Embedding Layer ====\n')
    for p in params[0:5]:
        print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))
    print('\n==== First Transformer ====\n')
    for p in params[5:21]:
        print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))
    print('\n==== Output Layer ====\n')
    for p in params[-4:]:
        print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))


# Configs
epochs = 3
batch_size = 5
rows_per_batch = 50
columns = ["Sentence", "Emotion"]

# Prepare the datasets
df = pd.read_csv('data/train_data.txt', header=0, names=columns, sep=";")
df = DataProcessor().process(df, columns)

# Init the training kit
training_kit = TrainingKit(
    df, 
    feat_col_name="Emotion", 
    data_col_name="Sentence", 
    row_size=batch_size * rows_per_batch,
)

tensor_ds = training_kit.get_tensor_dataset()
train_ds, val_ds = split_tensor_datasets(tensor_ds, ratio=0.7)

train_dataloader = get_training_dataset_loader(train_ds, batch_size=batch_size)
val_dataloader = get_validate_dataset_loader(val_ds, batch_size=batch_size)

# Prepare the model
model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased",  # Use the 12-layer BERT model, with an uncased vocab.
    num_labels=len(training_kit.features),  # The number of output labels.
    output_attentions=False,  # Whether the model returns attentions weights.
    output_hidden_states=False,  # Whether the model returns all hidden-states.
)

optimizer = AdamW(model.parameters(),
                  lr=5e-5,  # args.learning_rate - default is 5e-5, our notebook had 2e-5
                  eps=1e-8  # args.adam_epsilon  - default is 1e-8.
                  )

total_steps = len(train_dataloader) * epochs
scheduler = get_linear_schedule_with_warmup(optimizer,
                                            num_warmup_steps=0,  # Default value in run_glue.py
                                            num_training_steps=total_steps)

trainer = Trainer(model, optimizer, scheduler,
                  train_dataloader, val_dataloader, epochs, device="cpu")

trainer.train()

model.save_pretrained("model")

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package wordnet to /home/zelcakok/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializin

RuntimeError: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx