# Use BERTweet Representations with LogisticRegression Softmax Classifier

In [1]:
from collections import Counter
import os
import numpy as np
import pandas as pd
import torch
from torch import nn, optim
import torch.nn as nn
from torch.utils.data import TensorDataset, Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from transformers import AutoModel, AutoTokenizer 


import dataset
import vsm
import sst

Some weights of RobertaModel were not initialized from the model checkpoint at vinai/bertweet-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
# bertweet = AutoModel.from_pretrained("vinai/bertweet-large")
# tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-large")

Downloading:   0%|          | 0.00/614 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at vinai/bertweet-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [5]:
# bertweet_base = AutoModel.from_pretrained("vinai/bertweet-base", use_fast=False)
# tokenizer_base = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False)

AttributeError: 'NoneType' object has no attribute '__name__'

In [2]:
TWITTER = 2
TWITTER_AIRLINES = 3
TWITTER_APPLE = 4

In [3]:
twitter_train, twitter_validate, twitter_test =  dataset.dataset_reader(TWITTER_AIRLINES)
[twitter_train, twitter_validate, twitter_test] = list(map(lambda ds : dataset.prune_columns(TWITTER_AIRLINES, ds), [twitter_train, twitter_validate, twitter_test]))

In [4]:
# bert_weights_name = 'bert-base-cased'
# bert_tokenizer = BertTokenizer.from_pretrained(bert_weights_name)
# bert_model = BertModel.from_pretrained(bert_weights_name)
# model = BertForSequenceClassification.from_pretrained(bert_weights_name)
# Unique values of sentiment
twitter_sentiment_labels = twitter_train['sentiment'].unique()

In [5]:
twitter_train.size, twitter_validate.size, twitter_test.size

(58560, 7320, 7320)

In [6]:
%%time
bertweet_experiment1500 = sst.experiment(
    twitter_train[:1500], # 
    dataset.bert_tweet_phi,
    dataset.fit_softmax_classifier,
    assess_dataframes=[twitter_validate[:1000]],
    vectorize=False)

              precision    recall  f1-score   support

    negative      0.786     0.938     0.855       612
     neutral      0.639     0.454     0.531       207
    positive      0.724     0.492     0.586       181

    accuracy                          0.757      1000
   macro avg      0.716     0.628     0.657      1000
weighted avg      0.745     0.757     0.739      1000

CPU times: user 47min 7s, sys: 4.73 s, total: 47min 11s
Wall time: 11min 58s


In [7]:
%%time
bertweet_experiment3000 = sst.experiment(
    twitter_train[:3000], 
    dataset.bert_tweet_phi,
    dataset.fit_softmax_classifier,
    assess_dataframes=[twitter_validate[:1000]],
    vectorize=False)

              precision    recall  f1-score   support

    negative      0.805     0.922     0.859       612
     neutral      0.639     0.488     0.553       207
    positive      0.730     0.569     0.640       181

    accuracy                          0.768      1000
   macro avg      0.725     0.660     0.684      1000
weighted avg      0.757     0.768     0.756      1000

CPU times: user 1h 17min 17s, sys: 7.78 s, total: 1h 17min 25s
Wall time: 19min 38s


In [8]:
%%time
bertweet_experiment6000 = sst.experiment(
    twitter_train[:6000], # 
    dataset.bert_tweet_phi,
    dataset.fit_softmax_classifier,
    assess_dataframes=[twitter_validate[:1500]],
    vectorize=False)

              precision    recall  f1-score   support

    negative      0.847     0.930     0.887       895
     neutral      0.703     0.599     0.647       312
    positive      0.796     0.669     0.727       257

    accuracy                          0.814      1464
   macro avg      0.782     0.733     0.754      1464
weighted avg      0.808     0.814     0.808      1464

CPU times: user 2h 16min 36s, sys: 20.6 s, total: 2h 16min 57s
Wall time: 34min 46s


In [9]:
%%time
bertweet_experiment_full = sst.experiment(
    twitter_train, # 
    dataset.bert_tweet_phi,
    dataset.fit_softmax_classifier,
    assess_dataframes=[twitter_validate],
    vectorize=False)

              precision    recall  f1-score   support

    negative      0.850     0.935     0.890       895
     neutral      0.708     0.574     0.634       312
    positive      0.788     0.693     0.737       257

    accuracy                          0.816      1464
   macro avg      0.782     0.734     0.754      1464
weighted avg      0.809     0.816     0.809      1464

CPU times: user 3h 54min 32s, sys: 25.7 s, total: 3h 54min 58s
Wall time: 59min 42s


In [10]:
bertweet_experiment_full.keys()

dict_keys(['model', 'phi', 'train_dataset', 'assess_datasets', 'predictions', 'metric', 'scores'])

In [11]:
bertweet_experiment_full['scores']

[0.7537046306356107]

In [12]:
bertweet_experiment_full['metric']

'safe_macro_f1'

In [13]:
bertweet_experiment_full['model']

LogisticRegression(multi_class='ovr', solver='liblinear')

----

# Test BERTweet trained on Tweets on test set

In [14]:
import importlib
importlib.reload(sst)

<module 'sst' from '/home/vicaran93/repos/CS224-final-project/sst.py'>

In [15]:
%%time
bertweet_test = sst.evaluate(
    bertweet_experiment_full['model'],
    bertweet_experiment_full['phi'],
    assess_dataframes=[twitter_test],
    vectorizer=bertweet_experiment_full['assess_datasets'][0]['vectorizer'],
    vectorize=False
)

              precision    recall  f1-score   support

    negative      0.847     0.937     0.890       911
     neutral      0.720     0.566     0.634       332
    positive      0.785     0.692     0.736       221

    accuracy                          0.816      1464
   macro avg      0.784     0.732     0.753      1464
weighted avg      0.809     0.816     0.809      1464

CPU times: user 25min 54s, sys: 2.44 s, total: 25min 57s
Wall time: 6min 33s


In [16]:
type(bertweet_test['predictions'][0])

numpy.ndarray

In [17]:
predictions_fname ='results/BERTweet_predictions_on_twitter_test_airline.csv'
df = bertweet_test['predictions'][0]
pd.DataFrame(df).to_csv(predictions_fname)

In [18]:
encoding_fname ='results/BERTweet_encodings_on_twitter_test_airline.csv'
encoded_test = bertweet_test['assess_datasets'][0]
pd.DataFrame(encoded_test).to_csv(encoding_fname)

In [19]:
predictions_df = pd.DataFrame(df)
predictions_df = predictions_df.set_index(twitter_test.index)
predictions_df

Unnamed: 0,0
5,negative
18,positive
24,negative
48,positive
52,neutral
...,...
14595,neutral
14607,negative
14614,negative
14620,negative


In [20]:
twitter_test['BERTweet_sentiment'] = predictions_df

In [21]:
twitter_test

Unnamed: 0,dataset,tweet_id,text,sentiment,airline,BERTweet_sentiment
5,twitter_airline,570300767074181121,@VirginAmerica seriously would pay $30 a fligh...,negative,Virgin America,negative
18,twitter_airline,570270684619923457,I ❤️ flying @VirginAmerica. ☺️👍,positive,Virgin America,positive
24,twitter_airline,570256553502068736,@VirginAmerica you guys messed up my seating.....,negative,Virgin America,negative
48,twitter_airline,570010539499393025,@VirginAmerica @ladygaga @carrieunderwood Afte...,neutral,Virgin America,positive
52,twitter_airline,570004391731847169,@VirginAmerica wish you flew out of Atlanta......,neutral,Virgin America,neutral
...,...,...,...,...,...,...
14595,twitter_airline,569593694963310593,@AmericanAir @ShannonBloom Where's my DM? Wher...,negative,American,neutral
14607,twitter_airline,569592270866878464,@AmericanAir i need someone to help me out,neutral,American,negative
14614,twitter_airline,569591540944756737,@AmericanAir I need to be at work tomorrow at ...,negative,American,negative
14620,twitter_airline,569590965880532993,@AmericanAir I wait 2+ hrs for CS to call me b...,negative,American,negative


In [22]:
test_predictions_fname ='results/BERTweet_predictions_added_to_twitter_test_airline.csv'
twitter_test.to_csv(test_predictions_fname)

In [23]:
correct = twitter_test[twitter_test['sentiment'] == twitter_test['BERTweet_sentiment']]

In [24]:
correct

Unnamed: 0,dataset,tweet_id,text,sentiment,airline,BERTweet_sentiment
5,twitter_airline,570300767074181121,@VirginAmerica seriously would pay $30 a fligh...,negative,Virgin America,negative
18,twitter_airline,570270684619923457,I ❤️ flying @VirginAmerica. ☺️👍,positive,Virgin America,positive
24,twitter_airline,570256553502068736,@VirginAmerica you guys messed up my seating.....,negative,Virgin America,negative
52,twitter_airline,570004391731847169,@VirginAmerica wish you flew out of Atlanta......,neutral,Virgin America,neutral
70,twitter_airline,569961866224652288,@virginamerica Need to change reservation. Hav...,neutral,Virgin America,neutral
...,...,...,...,...,...,...
14553,twitter_airline,569600462661554177,"@AmericanAir @tennetexan Too bad there's only,...",negative,American,negative
14573,twitter_airline,569597220871282690,@AmericanAir You didn't respond to my DM. You ...,negative,American,negative
14588,twitter_airline,569595309279440896,@AmericanAir if business class if full but 1st...,neutral,American,neutral
14614,twitter_airline,569591540944756737,@AmericanAir I need to be at work tomorrow at ...,negative,American,negative


In [25]:
incorrect = twitter_test[twitter_test['sentiment'] != twitter_test['BERTweet_sentiment']]

In [26]:
incorrect

Unnamed: 0,dataset,tweet_id,text,sentiment,airline,BERTweet_sentiment
48,twitter_airline,570010539499393025,@VirginAmerica @ladygaga @carrieunderwood Afte...,neutral,Virgin America,positive
56,twitter_airline,569996245462159361,@VirginAmerica you know it. Need it on my spot...,positive,Virgin America,neutral
86,twitter_airline,569923394990419968,@VirginAmerica Can't bring up my reservation o...,neutral,Virgin America,negative
237,twitter_airline,569198782421663745,@VirginAmerica is todays flight from Palm Spri...,negative,Virgin America,neutral
293,twitter_airline,568840560347373569,@VirginAmerica would love to do more for virgi...,positive,Virgin America,neutral
...,...,...,...,...,...,...
14439,twitter_airline,569616120866476032,@AmericanAir so how do we get me a person?,neutral,American,negative
14545,twitter_airline,569601337882755073,@AmericanAir yes and I would like a refund.,neutral,American,negative
14595,twitter_airline,569593694963310593,@AmericanAir @ShannonBloom Where's my DM? Wher...,negative,American,neutral
14607,twitter_airline,569592270866878464,@AmericanAir i need someone to help me out,neutral,American,negative


# Save Model

In [27]:
import pickle
model_fname = 'models/BERTweet_twitter_airline.sav'
pickle.dump(bertweet_experiment_full, open(model_fname, 'wb'))