# Use BERTwet Representations with LogisticRegression Softmax Classifier

In [1]:
from collections import Counter
import os
import numpy as np
import pandas as pd
import torch
from torch import nn, optim
import torch.nn as nn
from torch.utils.data import TensorDataset, Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score


import dataset
import vsm
import sst

Some weights of RobertaModel were not initialized from the model checkpoint at vinai/bertweet-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [2]:
TWITTER = 2
TWITTER_AIRLINES = 3
TWITTER_APPLE = 4

In [3]:
twitter_train, twitter_validate, twitter_test =  dataset.dataset_reader(TWITTER)
[twitter_train, twitter_validate, twitter_test] = list(map(lambda ds : dataset.prune_columns(2, ds), [twitter_train, twitter_validate, twitter_test]))

In [4]:
airline_train, airline_validate, airline_test =  dataset.dataset_reader(TWITTER_AIRLINES)
[airline_train, airline_validate, airline_test] = list(map(lambda ds : dataset.prune_columns(3, ds), [airline_train, airline_validate, airline_test]))

In [5]:
apple_train, apple_validate, apple_test =  dataset.dataset_reader(TWITTER_APPLE)
[apple_train, apple_validate, apple_test] = list(map(lambda ds : dataset.prune_columns(4, ds), [apple_train, apple_validate, apple_test]))

In [6]:
# Rename labels for consistency
def rename_sentiment(sentiment):
        if sentiment in ["5", "positive"]:
            return "Positive"
        elif sentiment in ["3", "neutral"]:
            return "Neutral"
        elif sentiment in ["1", "negative"]:
            return "Negative"
        elif sentiment in ["not_relevant"]:
            return "Irrelevant"
        else:
            return sentiment


In [7]:
airline_train['sentiment'] = pd.DataFrame([rename_sentiment(e) for e in airline_train['sentiment']]).set_index(airline_train.index)
airline_validate['sentiment'] = pd.DataFrame([rename_sentiment(e) for e in airline_validate['sentiment']]).set_index(airline_validate.index)
airline_test['sentiment'] = pd.DataFrame([rename_sentiment(e) for e in airline_test['sentiment']]).set_index(airline_test.index)

In [8]:
apple_train['sentiment'] = pd.DataFrame([rename_sentiment(e) for e in apple_train['sentiment']]).set_index(apple_train.index)
apple_validate['sentiment'] = pd.DataFrame([rename_sentiment(e) for e in apple_validate['sentiment']]).set_index(apple_validate.index)
apple_test['sentiment'] = pd.DataFrame([rename_sentiment(e) for e in apple_test['sentiment']]).set_index(apple_test.index)

In [9]:
# cat the datasets
train_ds = pd.concat([twitter_train,airline_train,apple_train],axis=0)
validate_ds = pd.concat([twitter_validate,airline_validate,apple_validate],axis=0)
test_ds = pd.concat([twitter_test,airline_test,apple_test],axis=0)

In [10]:
# Unique values of sentiment
twitter_sentiment_labels = train_ds['sentiment'].unique()

In [11]:
train_ds.size, validate_ds.size, test_ds.size

(402588, 17112, 145548)

In [12]:
%%time
bertweet_experiment_full = sst.experiment(
    [train_ds], # 
    dataset.bert_tweet_phi,
    dataset.fit_softmax_classifier,
    assess_dataframes=[validate_ds],
    vectorize=False)

              precision    recall  f1-score   support

  Irrelevant      0.593     0.282     0.382       181
    Negative      0.759     0.892     0.820      1294
     Neutral      0.696     0.578     0.632       820
    Positive      0.635     0.645     0.640       557

    accuracy                          0.715      2852
   macro avg      0.671     0.599     0.618      2852
weighted avg      0.706     0.715     0.703      2852

CPU times: user 15h 51min 19s, sys: 45min 38s, total: 16h 36min 57s
Wall time: 5h 45min 11s


In [13]:
bertweet_experiment_full.keys()

dict_keys(['model', 'phi', 'train_dataset', 'assess_datasets', 'predictions', 'metric', 'scores'])

In [14]:
bertweet_experiment_full['scores']

[0.6184287270912014]

In [15]:
bertweet_experiment_full['metric']

'safe_macro_f1'

In [16]:
bertweet_experiment_full['model']

LogisticRegression(multi_class='ovr', solver='liblinear')

# Test BERT trained on Tweets on test set

In [17]:
def predict_one_bert(text):
    # List of tokenized examples:
    X = [bertweet_experiment_full['phi'](text)]
    # Standard `predict` step on a list of lists of str:
    preds = bertweet_experiment_full['model'].predict(X)
    # Be sure to return the only member of the predictions,
    # rather than the singleton list:
    return preds[0]

In [18]:
# %% time
# twitter_test['prediction'] = twitter_test['text'].apply(predict_one_bert)

In [19]:
# import importlib
# importlib.reload(sst)

In [None]:
%%time
bert_test = sst.evaluate(
    bertweet_experiment_full['model'],
    bertweet_experiment_full['phi'],
    assess_dataframes=[test_ds],
    vectorizer=bertweet_experiment_full['assess_datasets'][0]['vectorizer'],
    vectorize=False
)

In [None]:
type(bert_test['predictions'][0])

In [None]:
predictions_fname ='results/BERT_predictions_on_combined_twitter_test.csv'
df = bert_test['predictions'][0]
pd.DataFrame(df).to_csv(predictions_fname)

In [None]:
encoding_fname ='results/BERT_encodings_on_combined_twitter_test.csv'
encoded_test = bert_test['assess_datasets'][0]
pd.DataFrame(encoded_test).to_csv(encoding_fname)

In [None]:
predictions_df = pd.DataFrame(df)
predictions_df = predictions_df.set_index(test_ds.index)
predictions_df

In [None]:
test_ds['BERTweet_sentiment'] = predictions_df

In [None]:
test_ds

In [None]:
test_predictions_fname ='results/BERTweet_predictions_added_to_combined_twitter_test.csv'
test_ds.to_csv(test_predictions_fname)

In [None]:
correct = test_ds[test_ds['sentiment'] == test_ds['BERTweet_sentiment']]

In [None]:
correct

In [None]:
incorrect = test_ds[test_ds['sentiment'] != test_ds['BERTweet_sentiment']]

In [None]:
incorrect

In [None]:
irrelevant = test_ds[test_ds['sentiment'] == 'Irrelevant']
irrelevant

# Save Model

In [None]:
import pickle
model_fname = 'models/BERTweet_twitter_model_combined.sav'
pickle.dump(bertweet_experiment_full['model'], open(model_fname, 'wb'))

In [None]:
bert_test.keys()

In [None]:
test_fname = 'results/BERTweet_test_combined.sav'
pickle.dump(bert_test, open(test_fname, 'wb'))