# Use BERTweet Representations with LogisticRegression Softmax Classifier

In [1]:
from collections import Counter
import os
import numpy as np
import pandas as pd
import torch
from torch import nn, optim
import torch.nn as nn
from torch.utils.data import TensorDataset, Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
# from transformers import BertTokenizer, BertModel, BertForSequenceClassification


import dataset
import vsm
import sst

Some weights of RobertaModel were not initialized from the model checkpoint at vinai/bertweet-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [2]:
TWITTER = 2
TWITTER_AIRLINES = 3
TWITTER_APPLE = 4

In [3]:
twitter_train, twitter_validate, twitter_test =  dataset.dataset_reader(TWITTER)
[twitter_train, twitter_validate, twitter_test] = list(map(lambda ds : dataset.prune_columns(2, ds), [twitter_train, twitter_validate, twitter_test]))

In [4]:
airline_train, airline_validate, airline_test =  dataset.dataset_reader(TWITTER_AIRLINES)
[airline_train, airline_validate, airline_test] = list(map(lambda ds : dataset.prune_columns(3, ds), [airline_train, airline_validate, airline_test]))

In [5]:
apple_train, apple_validate, apple_test =  dataset.dataset_reader(TWITTER_APPLE)
[apple_train, apple_validate, apple_test] = list(map(lambda ds : dataset.prune_columns(4, ds), [apple_train, apple_validate, apple_test]))

In [6]:
# Rename labels for consistency
def rename_sentiment(sentiment):
        if sentiment in ["5", "positive"]:
            return "Positive"
        elif sentiment in ["3", "neutral"]:
            return "Neutral"
        elif sentiment in ["1", "negative"]:
            return "Negative"
        elif sentiment in ["not_relevant","Irrelevant"]:
            return "Neutral"
        else:
            return sentiment


In [7]:
twitter_train['sentiment'] = pd.DataFrame([rename_sentiment(e) for e in twitter_train['sentiment']]).set_index(twitter_train.index)
twitter_validate['sentiment'] = pd.DataFrame([rename_sentiment(e) for e in twitter_validate['sentiment']]).set_index(twitter_validate.index)
twitter_test['sentiment'] = pd.DataFrame([rename_sentiment(e) for e in twitter_test['sentiment']]).set_index(twitter_test.index)

In [8]:
airline_train['sentiment'] = pd.DataFrame([rename_sentiment(e) for e in airline_train['sentiment']]).set_index(airline_train.index)
airline_validate['sentiment'] = pd.DataFrame([rename_sentiment(e) for e in airline_validate['sentiment']]).set_index(airline_validate.index)
airline_test['sentiment'] = pd.DataFrame([rename_sentiment(e) for e in airline_test['sentiment']]).set_index(airline_test.index)

In [9]:
apple_train['sentiment'] = pd.DataFrame([rename_sentiment(e) for e in apple_train['sentiment']]).set_index(apple_train.index)
apple_validate['sentiment'] = pd.DataFrame([rename_sentiment(e) for e in apple_validate['sentiment']]).set_index(apple_validate.index)
apple_test['sentiment'] = pd.DataFrame([rename_sentiment(e) for e in apple_test['sentiment']]).set_index(apple_test.index)

In [10]:
# cat the datasets
train_ds = pd.concat([twitter_train,airline_train,apple_train],axis=0)
validate_ds = pd.concat([twitter_validate,airline_validate,apple_validate],axis=0)
test_ds = pd.concat([twitter_test,airline_test,apple_test],axis=0)

In [11]:
# bert_weights_name = 'bert-base-cased'
# bert_tokenizer = BertTokenizer.from_pretrained(bert_weights_name)
# bert_model = BertModel.from_pretrained(bert_weights_name)
# model = BertForSequenceClassification.from_pretrained(bert_weights_name)
# Unique values of sentiment
twitter_sentiment_labels = train_ds['sentiment'].unique()

In [12]:
train_ds.size, validate_ds.size, test_ds.size

(402588, 17112, 145548)

In [13]:
%%time
bert_experiment_full = sst.experiment(
    [train_ds], # 
    dataset.bert_tweet_phi,
    dataset.fit_softmax_classifier,
    assess_dataframes=[validate_ds],
    vectorize=False)

              precision    recall  f1-score   support

    Negative      0.777     0.872     0.822      1288
     Neutral      0.728     0.666     0.695      1005
    Positive      0.674     0.589     0.628       559

    accuracy                          0.744      2852
   macro avg      0.726     0.709     0.715      2852
weighted avg      0.740     0.744     0.739      2852

CPU times: user 13h 41min 55s, sys: 15min 34s, total: 13h 57min 30s
Wall time: 2h 25min 21s


In [14]:
bert_experiment_full.keys()

dict_keys(['model', 'phi', 'train_dataset', 'assess_datasets', 'predictions', 'metric', 'scores'])

In [15]:
bert_experiment_full['scores']

[0.7152320020307276]

In [16]:
bert_experiment_full['metric']

'safe_macro_f1'

In [17]:
bert_experiment_full['model']

LogisticRegression(multi_class='ovr', solver='liblinear')

# Test BERTweet trained on Tweets on test set

In [18]:
def predict_one_bert(text):
    # List of tokenized examples:
    X = [bert_experiment_full['phi'](text)]
    # Standard `predict` step on a list of lists of str:
    preds = bert_experiment_full['model'].predict(X)
    # Be sure to return the only member of the predictions,
    # rather than the singleton list:
    return preds[0]

In [19]:
# %% time
# twitter_test['prediction'] = twitter_test['text'].apply(predict_one_bert)

In [20]:
# import importlib
# importlib.reload(sst)

In [19]:
%%time
bert_test = sst.evaluate(
    bert_experiment_full['model'],
    bert_experiment_full['phi'],
    assess_dataframes=[test_ds],
    vectorizer=bert_experiment_full['assess_datasets'][0]['vectorizer'],
    vectorize=False
)

              precision    recall  f1-score   support

    Negative      0.697     0.701     0.699      7820
     Neutral      0.636     0.685     0.660      9860
    Positive      0.631     0.552     0.589      6578

    accuracy                          0.654     24258
   macro avg      0.654     0.646     0.649     24258
weighted avg      0.654     0.654     0.653     24258

CPU times: user 4h 43min 39s, sys: 5min 23s, total: 4h 49min 2s
Wall time: 48min 22s


In [20]:
type(bert_test['predictions'][0])

numpy.ndarray

In [21]:
predictions_fname ='results/BERTweet_predictions_on_combined_neutral_twitter_test.csv'
df = bert_test['predictions'][0]
pd.DataFrame(df).to_csv(predictions_fname)

In [22]:
encoding_fname ='results/BERTweet_encodings_on_combined_neutral_twitter_test.csv'
encoded_test = bert_test['assess_datasets'][0]
pd.DataFrame(encoded_test).to_csv(encoding_fname)

In [23]:
predictions_df = pd.DataFrame(df)
predictions_df = predictions_df.set_index(test_ds.index)
predictions_df

Unnamed: 0,0
1,Neutral
7,Neutral
10,Positive
11,Neutral
16,Neutral
...,...
3819,Negative
3826,Neutral
3833,Neutral
3843,Negative


In [24]:
test_ds['BERTweet_sentiment'] = predictions_df

In [25]:
test_ds

Unnamed: 0,dataset,tweet_id,text,sentiment,entity,airline,BERTweet_sentiment
1,twitter_sentiment,1,I am coming to the borders and I will kill you...,Positive,2401.0,,Neutral
7,twitter_sentiment,2,So I spent a couple of hours doing something f...,Positive,2402.0,,Neutral
10,twitter_sentiment,3,2010 So I spent a few hours making something f...,Positive,2402.0,,Positive
11,twitter_sentiment,4,was,Positive,2402.0,,Neutral
16,twitter_sentiment,5,"Live Rock - Hard music La la Varlope, RARE & t...",Neutral,2403.0,,Neutral
...,...,...,...,...,...,...,...
3819,twitter_apple,623499349,Fuck this is the third charger I've broke in l...,Negative,,,Negative
3826,twitter_apple,623499356,RT @iLoveMyMom98: I've tried turning it off an...,Negative,,,Neutral
3833,twitter_apple,623499363,APPLE Intraday Comments - LAST Update - PREMIU...,Neutral,,,Neutral
3843,twitter_apple,623499373,@iamrayuko @Apple because you know you don't w...,Neutral,,,Negative


In [28]:
test_predictions_fname ='results/BERTweet_predictions_added_to_combined_neutral_twitter_test.csv'
test_ds.to_csv(test_predictions_fname)

In [26]:
correct = test_ds[test_ds['sentiment'] == test_ds['BERTweet_sentiment']]

In [27]:
correct

Unnamed: 0,dataset,tweet_id,text,sentiment,entity,airline,BERTweet_sentiment
10,twitter_sentiment,3,2010 So I spent a few hours making something f...,Positive,2402.0,,Positive
16,twitter_sentiment,5,"Live Rock - Hard music La la Varlope, RARE & t...",Neutral,2403.0,,Neutral
20,twitter_sentiment,6,that was the first borderlands session in a lo...,Positive,2404.0,,Positive
21,twitter_sentiment,7,that was the first borderlands session in a lo...,Positive,2404.0,,Positive
23,twitter_sentiment,8,that was the first borderlands session in a ho...,Positive,2404.0,,Positive
...,...,...,...,...,...,...,...
3813,twitter_apple,623499343,My phone went from 78% to 21% @apple please te...,Negative,,,Negative
3817,twitter_apple,623499347,Why Apple Watch May Be a Luxury Fashion Hit $...,Neutral,,,Neutral
3819,twitter_apple,623499349,Fuck this is the third charger I've broke in l...,Negative,,,Negative
3833,twitter_apple,623499363,APPLE Intraday Comments - LAST Update - PREMIU...,Neutral,,,Neutral


In [28]:
incorrect = test_ds[test_ds['sentiment'] != test_ds['BERTweet_sentiment']]

In [29]:
incorrect

Unnamed: 0,dataset,tweet_id,text,sentiment,entity,airline,BERTweet_sentiment
1,twitter_sentiment,1,I am coming to the borders and I will kill you...,Positive,2401.0,,Neutral
7,twitter_sentiment,2,So I spent a couple of hours doing something f...,Positive,2402.0,,Neutral
11,twitter_sentiment,4,was,Positive,2402.0,,Neutral
65,twitter_sentiment,20,.. what,Neutral,2411.0,,Positive
74,twitter_sentiment,25,imma will probably play some border tps in a b...,Positive,2413.0,,Neutral
...,...,...,...,...,...,...,...
3744,twitter_apple,623499274,RT @iLoveMyMom98: I've tried turning it off an...,Negative,,,Neutral
3756,twitter_apple,623499286,The best @Apple #iPhone apps for 2014 http://t...,Neutral,,,Positive
3818,twitter_apple,623499348,RT @CNET: @Apple pioneer Bill Fernandez on @Go...,Neutral,,,Positive
3826,twitter_apple,623499356,RT @iLoveMyMom98: I've tried turning it off an...,Negative,,,Neutral


In [30]:
irrelevant = test_ds[test_ds['sentiment'] == 'Irrelevant']
irrelevant

Unnamed: 0,dataset,tweet_id,text,sentiment,entity,airline,BERTweet_sentiment


# Save Model

In [31]:
import pickle
model_fname = 'models/BERTweet_twitter_model_combined_neutral.sav'
pickle.dump(bert_experiment_full['model'], open(model_fname, 'wb'))

In [32]:
test_fname = 'results/BERTweet_test_combined_neutral.sav'
pickle.dump(bert_test, open(test_fname, 'wb'))