# Use BERTweet Representations with LogisticRegression Softmax Classifier

In [1]:
from collections import Counter
import os
import numpy as np
import pandas as pd
import torch
from torch import nn, optim
import torch.nn as nn
from torch.utils.data import TensorDataset, Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score

import dataset
import vsm
import sst

Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/436M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/614 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at vinai/bertweet-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [2]:
TWITTER = 2
TWITTER_AIRLINES = 3
TWITTER_APPLE = 4

In [3]:
twitter_train, twitter_validate, twitter_test =  dataset.dataset_reader(TWITTER)
[twitter_train, twitter_validate, twitter_test] = list(map(lambda ds : dataset.prune_columns(2, ds), [twitter_train, twitter_validate, twitter_test]))

In [4]:
# Unique values of sentiment
twitter_sentiment_labels = twitter_train['sentiment'].unique()

In [5]:
twitter_train.size, twitter_validate.size

(261385, 5000)

In [6]:
%%time
bertweet_experiment1500 = sst.experiment(
    twitter_train[:1500], # 
    dataset.bert_tweet_phi,
    dataset.fit_softmax_classifier,
    assess_dataframes=[twitter_validate[:1000]],
    vectorize=False)

              precision    recall  f1-score   support

  Irrelevant      0.576     0.198     0.294       172
    Negative      0.521     0.695     0.596       266
     Neutral      0.575     0.512     0.542       285
    Positive      0.491     0.588     0.535       277

    accuracy                          0.528      1000
   macro avg      0.541     0.498     0.492      1000
weighted avg      0.538     0.528     0.512      1000

CPU times: user 52min 56s, sys: 6.29 s, total: 53min 2s
Wall time: 13min 42s


In [7]:
%%time
bertweet_experiment3000 = sst.experiment(
    twitter_train[:3000], # 
    dataset.bert_tweet_phi,
    dataset.fit_softmax_classifier,
    assess_dataframes=[twitter_validate[:1000]],
    vectorize=False)

              precision    recall  f1-score   support

  Irrelevant      0.545     0.279     0.369       172
    Negative      0.572     0.703     0.631       266
     Neutral      0.603     0.544     0.572       285
    Positive      0.552     0.653     0.598       277

    accuracy                          0.571      1000
   macro avg      0.568     0.545     0.543      1000
weighted avg      0.571     0.571     0.560      1000

CPU times: user 1h 18min 47s, sys: 8.88 s, total: 1h 18min 55s
Wall time: 20min 25s


In [8]:
%%time
bertweet_experiment6000 = sst.experiment(
    twitter_train[:6000], # 
    dataset.bert_tweet_phi,
    dataset.fit_softmax_classifier,
    assess_dataframes=[twitter_validate[:1500]],
    vectorize=False)

              precision    recall  f1-score   support

  Irrelevant      0.579     0.256     0.355       172
    Negative      0.589     0.756     0.662       266
     Neutral      0.601     0.533     0.565       285
    Positive      0.564     0.671     0.613       277

    accuracy                          0.583      1000
   macro avg      0.583     0.554     0.549      1000
weighted avg      0.584     0.583     0.568      1000

CPU times: user 2h 16min 44s, sys: 16 s, total: 2h 17min
Wall time: 35min 38s


In [9]:
%%time
bertweet_experiment12000 = sst.experiment(
    twitter_train[:12000], # 
    dataset.bert_tweet_phi,
    dataset.fit_softmax_classifier,
    assess_dataframes=[twitter_validate[:2000]],
    vectorize=False)

              precision    recall  f1-score   support

  Irrelevant      0.565     0.302     0.394       172
    Negative      0.604     0.767     0.675       266
     Neutral      0.617     0.537     0.574       285
    Positive      0.584     0.679     0.628       277

    accuracy                          0.597      1000
   macro avg      0.592     0.571     0.568      1000
weighted avg      0.595     0.597     0.585      1000

CPU times: user 4h 22min 45s, sys: 41.2 s, total: 4h 23min 26s
Wall time: 1h 8min 20s


In [6]:
%%time
bertweet_experiment_full = sst.experiment(
    twitter_train, # 
    dataset.bert_tweet_phi,
    dataset.fit_softmax_classifier,
    assess_dataframes=[twitter_validate],
    vectorize=False)

              precision    recall  f1-score   support

  Irrelevant      0.592     0.355     0.444       172
    Negative      0.652     0.789     0.714       266
     Neutral      0.631     0.565     0.596       285
    Positive      0.591     0.682     0.633       277

    accuracy                          0.621      1000
   macro avg      0.617     0.598     0.597      1000
weighted avg      0.619     0.621     0.612      1000

CPU times: user 12h 3min 34s, sys: 34min 18s, total: 12h 37min 52s
Wall time: 4h 20min 43s


In [7]:
bertweet_experiment_full.keys()

dict_keys(['model', 'phi', 'train_dataset', 'assess_datasets', 'predictions', 'metric', 'scores'])

In [8]:
bertweet_experiment_full['scores']

[0.5968460508410257]

In [9]:
bertweet_experiment_full['metric']

'safe_macro_f1'

In [10]:
bertweet_experiment_full['model']

LogisticRegression(multi_class='ovr', solver='liblinear')

# Test BERT trained on Tweets on test set

In [11]:
def predict_one_bert(text):
    # List of tokenized examples:
    X = [bertweet_experiment_full['phi'](text)]
    # Standard `predict` step on a list of lists of str:
    preds = bertweet_experiment_full['model'].predict(X)
    # Be sure to return the only member of the predictions,
    # rather than the singleton list:
    return preds[0]

In [12]:
# %% time
# twitter_test['prediction'] = twitter_test['text'].apply(predict_one_bert)

In [13]:
import importlib
importlib.reload(sst)

<module 'sst' from '/home/scpdxcs/repos/CS224-final-project/sst.py'>

In [14]:
%%time
bertweet_test = sst.evaluate(
    bertweet_experiment_full['model'],
    bertweet_experiment_full['phi'],
    assess_dataframes=[twitter_test],
    vectorizer=bertweet_experiment_full['assess_datasets'][0]['vectorizer'],
    vectorize=False
)

              precision    recall  f1-score   support

  Irrelevant      0.508     0.300     0.377      3976
    Negative      0.627     0.735     0.677      6700
     Neutral      0.568     0.523     0.545      5523
    Positive      0.574     0.659     0.613      6206

    accuracy                          0.584     22405
   macro avg      0.569     0.554     0.553     22405
weighted avg      0.577     0.584     0.574     22405

CPU times: user 4h 57min 57s, sys: 13min 31s, total: 5h 11min 28s
Wall time: 1h 40min 18s


In [15]:
type(bertweet_test['predictions'][0])

numpy.ndarray

In [16]:
predictions_fname ='results/BERTweet_predictions_on_twitter_test.csv'
df = bertweet_test['predictions'][0]
pd.DataFrame(df).to_csv(predictions_fname)

In [17]:
encoding_fname ='results/BERTweet_encodings_on_twitter_test.csv'
encoded_test = bertweet_test['assess_datasets'][0]
pd.DataFrame(encoded_test).to_csv(encoding_fname)

In [18]:
predictions_df = pd.DataFrame(df)
predictions_df = predictions_df.set_index(twitter_test.index)
predictions_df

Unnamed: 0,0
3,Positive
7,Neutral
8,Positive
12,Neutral
13,Neutral
...,...
74657,Neutral
74661,Neutral
74662,Neutral
74671,Neutral


In [19]:
twitter_test['BERTweet_sentiment'] = predictions_df

In [20]:
twitter_test

Unnamed: 0,dataset,tweet_id,text,sentiment,entity,BERTweet_sentiment
3,twitter_sentiment,1,im coming on borderlands and i will murder you...,Positive,2401,Positive
7,twitter_sentiment,2,So I spent a couple of hours doing something f...,Positive,2402,Neutral
8,twitter_sentiment,3,So I spent a few hours doing something for fun...,Positive,2402,Positive
12,twitter_sentiment,4,"Rock-Hard La Varlope, RARE & POWERFUL, HANDSOM...",Neutral,2403,Neutral
13,twitter_sentiment,5,"Rock-Hard La Varlope, RARE & POWERFUL, HANDSOM...",Neutral,2403,Neutral
...,...,...,...,...,...,...
74657,twitter_sentiment,22401,really doesn't look bad btw!.. net.com/google-...,Negative,9196,Neutral
74661,twitter_sentiment,22402,Nvidia doesn’t intend to give away its 2017 ad...,Neutral,9197,Neutral
74662,twitter_sentiment,22403,Nvidia therefore doesn ’ t want to give up its...,Neutral,9197,Neutral
74671,twitter_sentiment,22404,t let Elim go unnoticed.... NVIDIA Highlights ...,Positive,9199,Neutral


In [21]:
test_predictions_fname ='results/BERTweet_predictions_added_to_twitter_test.csv'
twitter_test.to_csv(test_predictions_fname)

In [22]:
correct = twitter_test[twitter_test['sentiment'] == twitter_test['BERTweet_sentiment']]

In [23]:
correct

Unnamed: 0,dataset,tweet_id,text,sentiment,entity,BERTweet_sentiment
3,twitter_sentiment,1,im coming on borderlands and i will murder you...,Positive,2401,Positive
8,twitter_sentiment,3,So I spent a few hours doing something for fun...,Positive,2402,Positive
12,twitter_sentiment,4,"Rock-Hard La Varlope, RARE & POWERFUL, HANDSOM...",Neutral,2403,Neutral
13,twitter_sentiment,5,"Rock-Hard La Varlope, RARE & POWERFUL, HANDSOM...",Neutral,2403,Neutral
15,twitter_sentiment,6,"Rock-Hard La Vita, RARE BUT POWERFUL, HANDSOME...",Neutral,2403,Neutral
...,...,...,...,...,...,...
74626,twitter_sentiment,22393,THE @NVIDIAGeForce The hell with you but Nvidi...,Negative,9191,Negative
74629,twitter_sentiment,22394,Save $500 on the AERO 15 OLED - Award-winning ...,Neutral,9192,Neutral
74633,twitter_sentiment,22395,than $500 on the AERO VA OLED - Award winning ...,Neutral,9192,Neutral
74661,twitter_sentiment,22402,Nvidia doesn’t intend to give away its 2017 ad...,Neutral,9197,Neutral


In [24]:
incorrect = twitter_test[twitter_test['sentiment'] != twitter_test['BERTweet_sentiment']]

In [25]:
incorrect

Unnamed: 0,dataset,tweet_id,text,sentiment,entity,BERTweet_sentiment
7,twitter_sentiment,2,So I spent a couple of hours doing something f...,Positive,2402,Neutral
43,twitter_sentiment,16,Check out this epic streamer!.,Neutral,2408,Irrelevant
45,twitter_sentiment,17,Check out our epic streamer!.,Neutral,2408,Irrelevant
51,twitter_sentiment,18,Blaming Sight for Tardiness! A little bit of b...,Neutral,2409,Irrelevant
53,twitter_sentiment,19,all,Neutral,2409,Positive
...,...,...,...,...,...,...
74654,twitter_sentiment,22399,??????????????????????????????????????????????...,Negative,9196,Positive
74655,twitter_sentiment,22400,Cheap will mean better<unk>!.. techsall.com/go...,Negative,9196,Positive
74657,twitter_sentiment,22401,really doesn't look bad btw!.. net.com/google-...,Negative,9196,Neutral
74671,twitter_sentiment,22404,t let Elim go unnoticed.... NVIDIA Highlights ...,Positive,9199,Neutral


In [26]:
irrelevant = twitter_test[twitter_test['sentiment'] == 'Irrelevant']
irrelevant

Unnamed: 0,dataset,tweet_id,text,sentiment,entity,BERTweet_sentiment
102,twitter_sentiment,35,Appreciate the (sonic) concepts / praxis Valen...,Irrelevant,2418,Irrelevant
126,twitter_sentiment,41,Loving these new @GhostLifestyle cans!! Anyone...,Irrelevant,2422,Positive
129,twitter_sentiment,42,Loving these new @GhostLifestyle cans!! Anyone...,Irrelevant,2422,Positive
139,twitter_sentiment,45,How the hell are we already into Halloween mon...,Irrelevant,2424,Neutral
222,twitter_sentiment,66,I'm in this @CBP video. @DHS_Wolf deceptively ...,Irrelevant,2438,Positive
...,...,...,...,...,...,...
73966,twitter_sentiment,22179,2010 I know this asset they are referring to (...,Irrelevant,9071,Irrelevant
73993,twitter_sentiment,22190,"Oh, yes, I used to work with these guys. Secon...",Irrelevant,9077,Positive
73995,twitter_sentiment,22191,"Oh, yeah, I used to work with these guys. Thir...",Irrelevant,9077,Positive
74038,twitter_sentiment,22199,9 Good idea for them. This is all based on ear...,Irrelevant,9085,Neutral


# Save Model

In [27]:
import pickle
model_fname = 'models/BERTweet_twitter_model.sav'
pickle.dump(bertweet_experiment_full['model'], open(model_fname, 'wb'))