In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
BASE_DIR = '/content/drive/MyDrive/Winter 2022/EECS 498/TweetSentimentDetector/'

In [None]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.18.0-py3-none-any.whl (4.0 MB)
[K     |████████████████████████████████| 4.0 MB 5.2 MB/s 
[?25hCollecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.11.6-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.5 MB)
[K     |████████████████████████████████| 6.5 MB 40.5 MB/s 
[?25hCollecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.5.1-py3-none-any.whl (77 kB)
[K     |████████████████████████████████| 77 kB 6.5 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 46.5 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.49-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 37.3 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Attempting uninstall: pyyaml


In [None]:
from transformers import BertTokenizer, BertModel
import torch
from torch import nn
from torch.optim import Adam
from tqdm import tqdm
import numpy as np
import pandas as pd
import pickle

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
labels = {
    0: 'sadness',
    1: 'joy',
    2: 'love',
    3: 'anger',
    4: 'fear',
    5: 'surprise'
}

Downloading:   0%|          | 0.00/208k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, df):
        self.labels = [label for label in df['label']]
        self.texts = [tokenizer(text, padding='max_length', max_length = 512, truncation=True, return_tensors="pt") for text in df['text']]
    
    def classes(self):
        return self.labels

    def __len__(self):
        return len(self.labels)

    def get_batch_labels(self, idx):
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        return self.texts[idx]

    def __getitem__(self, idx):

        batch_texts = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_texts, batch_y

    

In [None]:
df_train = df = pd.read_csv('/content/drive/MyDrive/Winter 2022/EECS 498/TweetSentimentDetector/data/training.csv')
df_val = df = pd.read_csv('/content/drive/MyDrive/Winter 2022/EECS 498/TweetSentimentDetector/data/validation.csv')
df_test = df = pd.read_csv('/content/drive/MyDrive/Winter 2022/EECS 498/TweetSentimentDetector/data/test.csv')

In [None]:
print(len(df_train),len(df_val), len(df_test))

16000 2000 2000


In [None]:
class BertClassifier(nn.Module):

    def __init__(self, dropout=0.5):

        super(BertClassifier, self).__init__()

        self.bert = BertModel.from_pretrained('bert-base-cased')

        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(768, 6)
        self.relu = nn.ReLU()

    def forward(self, input_id, mask):

        _, pooled_output = self.bert(input_ids= input_id, attention_mask=mask,return_dict=False)
        dropout_output = self.dropout(pooled_output)
        linear_output = self.linear(dropout_output)
        final_layer = self.relu(linear_output)

        return final_layer

In [None]:
def train(model, train_data, val_data, learning_rate, epochs, starting_epoch=0):
    train, val = Dataset(train_data), Dataset(val_data)

    train_dataloader = torch.utils.data.DataLoader(train, batch_size=2, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val, batch_size=2)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr= learning_rate)
    train_losses = []
    train_accuracies = []
    val_losses = []
    val_accuracies = []

    if use_cuda:
            model = model.cuda()
            criterion = criterion.cuda()

    for epoch_num in range(starting_epoch, epochs):
            if epoch_num > 0:
                model = BertClassifier()
                model.load_state_dict(torch.load(BASE_DIR + 'models_no_freeze/model-' + str(epoch_num - 1)))
                model = model.cuda()
                
                with (open(BASE_DIR + 'losses_and_accuracies_no_freeze/train_loss.pkl', 'rb')) as f:
                    train_losses = pickle.load(f)
                with (open(BASE_DIR + 'losses_and_accuracies_no_freeze/train_accuracy.pkl', 'rb')) as f:
                    train_accuracies = pickle.load(f)
                with (open(BASE_DIR + 'losses_and_accuracies_no_freeze/val_loss.pkl', 'rb')) as f:
                    val_losses = pickle.load(f)
                with (open(BASE_DIR + 'losses_and_accuracies_no_freeze/val_accuracy.pkl', 'rb')) as f:
                    val_accuracies = pickle.load(f)
              
            print('train_losses:', train_losses)
            print('train_accuracies:', train_accuracies)
            print('val_losses:', val_losses)
            print('val_accuracies:', val_accuracies)
            total_acc_train = 0
            total_loss_train = 0

            for train_input, train_label in tqdm(train_dataloader):

                train_label = train_label.to(device)
                mask = train_input['attention_mask'].to(device)
                input_id = train_input['input_ids'].squeeze(1).to(device)

                output = model(input_id, mask)
                
                batch_loss = criterion(output, train_label)
                total_loss_train += batch_loss.item()
                
                acc = (output.argmax(dim=1) == train_label).sum().item()
                total_acc_train += acc

                model.zero_grad()
                batch_loss.backward()
                optimizer.step()
            
            total_acc_val = 0
            total_loss_val = 0

            with torch.no_grad():

                for val_input, val_label in val_dataloader:

                    val_label = val_label.to(device)
                    mask = val_input['attention_mask'].to(device)
                    input_id = val_input['input_ids'].squeeze(1).to(device)

                    output = model(input_id, mask)

                    batch_loss = criterion(output, val_label)
                    total_loss_val += batch_loss.item()
                    
                    acc = (output.argmax(dim=1) == val_label).sum().item()
                    total_acc_val += acc
            torch.save(model.state_dict(), BASE_DIR + 'models_no_freeze/model-' + str(epoch_num))

            # save losses and accuracies
            train_losses.append(total_loss_train / len(train_data))
            train_accuracies.append(total_acc_train / len(train_data))
            val_losses.append(total_loss_val / len(val_data))
            val_accuracies.append(total_acc_val / len(val_data))
            with (open(BASE_DIR + 'losses_and_accuracies_no_freeze/train_loss.pkl', 'wb')) as f:
                pickle.dump(train_losses, f, protocol=pickle.HIGHEST_PROTOCOL)
            with (open(BASE_DIR + 'losses_and_accuracies_no_freeze/train_accuracy.pkl', 'wb')) as f:
                pickle.dump(train_accuracies, f, protocol=pickle.HIGHEST_PROTOCOL)
            with (open(BASE_DIR + 'losses_and_accuracies_no_freeze/val_loss.pkl', 'wb')) as f:
                pickle.dump(val_losses, f, protocol=pickle.HIGHEST_PROTOCOL)
            with (open(BASE_DIR + 'losses_and_accuracies_no_freeze/val_accuracy.pkl', 'wb')) as f:
                pickle.dump(val_accuracies, f, protocol=pickle.HIGHEST_PROTOCOL)
            
            print(
                f'Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / len(train_data): .3f} \
                | Train Accuracy: {total_acc_train / len(train_data): .3f} \
                | Val Loss: {total_loss_val / len(val_data): .3f} \
                | Val Accuracy: {total_acc_val / len(val_data): .3f}')        

In [None]:
EPOCHS = 10
model = BertClassifier()
LR = 1e-6
              
train(model, df_train, df_val, LR, EPOCHS)

Downloading:   0%|          | 0.00/416M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


train_losses: []
train_accuracies: []
val_losses: []
val_accuracies: []


100%|██████████| 8000/8000 [59:08<00:00,  2.25it/s]


Epochs: 1 | Train Loss:  0.562                 | Train Accuracy:  0.621                 | Val Loss:  0.294                 | Val Accuracy:  0.821


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


train_losses: [0.5618685432574712]
train_accuracies: [0.620875]
val_losses: [0.2936941531486809]
val_accuracies: [0.8205]


 52%|█████▏    | 4135/8000 [30:35<28:42,  2.24it/s]