# Use BERTweet Representations with LogisticRegression Softmax Classifier

In [2]:
from collections import Counter
import os
import numpy as np
import pandas as pd
import torch
from torch import nn, optim
import torch.nn as nn
from torch.utils.data import TensorDataset, Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score

import dataset
import vsm
import sst

Some weights of RobertaModel were not initialized from the model checkpoint at vinai/bertweet-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
TWITTER = 2
TWITTER_AIRLINES = 3
TWITTER_APPLE = 4

In [4]:
twitter_train, twitter_validate, twitter_test =  dataset.dataset_reader(TWITTER)
[twitter_train, twitter_validate, twitter_test] = list(map(lambda ds : dataset.prune_columns(2, ds), [twitter_train, twitter_validate, twitter_test]))

In [5]:
# Unique values of sentiment
twitter_sentiment_labels = twitter_train['sentiment'].unique()

In [6]:
twitter_train.size, twitter_validate.size

(261385, 5000)

In [6]:
%%time
bertweet_experiment1500 = sst.experiment(
    twitter_train[:1500], # 
    dataset.bert_tweet_phi,
    dataset.fit_softmax_classifier,
    assess_dataframes=[twitter_validate[:1000]],
    vectorize=False)

              precision    recall  f1-score   support

  Irrelevant      0.576     0.198     0.294       172
    Negative      0.521     0.695     0.596       266
     Neutral      0.575     0.512     0.542       285
    Positive      0.491     0.588     0.535       277

    accuracy                          0.528      1000
   macro avg      0.541     0.498     0.492      1000
weighted avg      0.538     0.528     0.512      1000

CPU times: user 52min 56s, sys: 6.29 s, total: 53min 2s
Wall time: 13min 42s


In [7]:
%%time
bertweet_experiment3000 = sst.experiment(
    twitter_train[:3000], # 
    dataset.bert_tweet_phi,
    dataset.fit_softmax_classifier,
    assess_dataframes=[twitter_validate[:1000]],
    vectorize=False)

              precision    recall  f1-score   support

  Irrelevant      0.545     0.279     0.369       172
    Negative      0.572     0.703     0.631       266
     Neutral      0.603     0.544     0.572       285
    Positive      0.552     0.653     0.598       277

    accuracy                          0.571      1000
   macro avg      0.568     0.545     0.543      1000
weighted avg      0.571     0.571     0.560      1000

CPU times: user 1h 18min 47s, sys: 8.88 s, total: 1h 18min 55s
Wall time: 20min 25s


In [8]:
%%time
bertweet_experiment6000 = sst.experiment(
    twitter_train[:6000], # 
    dataset.bert_tweet_phi,
    dataset.fit_softmax_classifier,
    assess_dataframes=[twitter_validate[:1500]],
    vectorize=False)

              precision    recall  f1-score   support

  Irrelevant      0.579     0.256     0.355       172
    Negative      0.589     0.756     0.662       266
     Neutral      0.601     0.533     0.565       285
    Positive      0.564     0.671     0.613       277

    accuracy                          0.583      1000
   macro avg      0.583     0.554     0.549      1000
weighted avg      0.584     0.583     0.568      1000

CPU times: user 2h 16min 44s, sys: 16 s, total: 2h 17min
Wall time: 35min 38s


In [9]:
%%time
bertweet_experiment12000 = sst.experiment(
    twitter_train[:12000], # 
    dataset.bert_tweet_phi,
    dataset.fit_softmax_classifier,
    assess_dataframes=[twitter_validate[:2000]],
    vectorize=False)

              precision    recall  f1-score   support

  Irrelevant      0.565     0.302     0.394       172
    Negative      0.604     0.767     0.675       266
     Neutral      0.617     0.537     0.574       285
    Positive      0.584     0.679     0.628       277

    accuracy                          0.597      1000
   macro avg      0.592     0.571     0.568      1000
weighted avg      0.595     0.597     0.585      1000

CPU times: user 4h 22min 45s, sys: 41.2 s, total: 4h 23min 26s
Wall time: 1h 8min 20s


In [None]:
%%time
bertweet_experiment_full = sst.experiment(
    twitter_train, # 
    dataset.bert_tweet_phi,
    dataset.fit_softmax_classifier,
    assess_dataframes=[twitter_validate],
    vectorize=False)

In [None]:
bertweet_experiment_full.keys()

In [None]:
bertweet_experiment_full['scores']

In [None]:
bertweet_experiment_full['metric']

In [None]:
bertweet_experiment_full['model']

# Test BERT trained on Tweets on test set

In [None]:
def predict_one_bert(text):
    # List of tokenized examples:
    X = [bertweet_experiment_full['phi'](text)]
    # Standard `predict` step on a list of lists of str:
    preds = bertweet_experiment_full['model'].predict(X)
    # Be sure to return the only member of the predictions,
    # rather than the singleton list:
    return preds[0]

In [None]:
# %% time
# twitter_test['prediction'] = twitter_test['text'].apply(predict_one_bert)

In [None]:
import importlib
importlib.reload(sst)

In [None]:
%%time
bertweet_test = sst.evaluate(
    bertweet_experiment_full['model'],
    bertweet_experiment_full['phi'],
    assess_dataframes=[twitter_test],
    vectorizer=bertweet_experiment_full['assess_datasets'][0]['vectorizer'],
    vectorize=False
)

In [None]:
type(bertweet_test['predictions'][0])

In [20]:
predictions_fname ='results/BERTweet_predictions_on_twitter_test.csv'
df = bertweet_test['predictions'][0]
pd.DataFrame(df).to_csv(predictions_fname)

In [None]:
encoding_fname ='results/BERTweet_encodings_on_twitter_test.csv'
encoded_test = bertweet_test['assess_datasets'][0]
pd.DataFrame(encoded_test).to_csv(encoding_fname)

In [None]:
predictions_df = pd.DataFrame(df)
predictions_df = predictions_df.set_index(twitter_test.index)
predictions_df

In [None]:
twitter_test['BERTweet_sentiment'] = predictions_df

In [None]:
twitter_test

In [None]:
test_predictions_fname ='results/BERTweet_predictions_added_to_twitter_test.csv'
twitter_test.to_csv(test_predictions_fname)

In [None]:
correct = twitter_test[twitter_test['sentiment'] == twitter_test['BERTweet_sentiment']]

In [None]:
correct

In [None]:
incorrect = twitter_test[twitter_test['sentiment'] != twitter_test['BERTweet_sentiment']]

In [None]:
incorrect

In [None]:
irrelevant = twitter_test[twitter_test['sentiment'] == 'Irrelevant']
irrelevant

# Save Model

In [None]:
import pickle
model_fname = 'models/BERTweet_twitter_model.sav'
pickle.dump(bertweet_experiment_full['model'], open(model_fname, 'wb'))