# Use BERTwet Representations with LogisticRegression Softmax Classifier

In [13]:
from collections import Counter
import os
import numpy as np
import pandas as pd
import torch
from torch import nn, optim
import torch.nn as nn
from torch.utils.data import TensorDataset, Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score


import dataset
import vsm
import sst
import pickle

In [2]:
TWITTER = 2
TWITTER_AIRLINES = 3
TWITTER_APPLE = 4

In [3]:
twitter_train, twitter_validate, twitter_test =  dataset.dataset_reader(TWITTER)
[twitter_train, twitter_validate, twitter_test] = list(map(lambda ds : dataset.prune_columns(2, ds), [twitter_train, twitter_validate, twitter_test]))

In [4]:
airline_train, airline_validate, airline_test =  dataset.dataset_reader(TWITTER_AIRLINES)
[airline_train, airline_validate, airline_test] = list(map(lambda ds : dataset.prune_columns(3, ds), [airline_train, airline_validate, airline_test]))

In [5]:
apple_train, apple_validate, apple_test =  dataset.dataset_reader(TWITTER_APPLE)
[apple_train, apple_validate, apple_test] = list(map(lambda ds : dataset.prune_columns(4, ds), [apple_train, apple_validate, apple_test]))

In [6]:
# Rename labels for consistency
def rename_sentiment(sentiment):
        if sentiment in ["5", "positive"]:
            return "Positive"
        elif sentiment in ["3", "neutral"]:
            return "Neutral"
        elif sentiment in ["1", "negative"]:
            return "Negative"
        elif sentiment in ["not_relevant"]:
            return "Irrelevant"
        else:
            return sentiment


In [7]:
airline_train['sentiment'] = pd.DataFrame([rename_sentiment(e) for e in airline_train['sentiment']]).set_index(airline_train.index)
airline_validate['sentiment'] = pd.DataFrame([rename_sentiment(e) for e in airline_validate['sentiment']]).set_index(airline_validate.index)
airline_test['sentiment'] = pd.DataFrame([rename_sentiment(e) for e in airline_test['sentiment']]).set_index(airline_test.index)

In [8]:
apple_train['sentiment'] = pd.DataFrame([rename_sentiment(e) for e in apple_train['sentiment']]).set_index(apple_train.index)
apple_validate['sentiment'] = pd.DataFrame([rename_sentiment(e) for e in apple_validate['sentiment']]).set_index(apple_validate.index)
apple_test['sentiment'] = pd.DataFrame([rename_sentiment(e) for e in apple_test['sentiment']]).set_index(apple_test.index)

In [9]:
# cat the datasets
train_ds = pd.concat([twitter_train,airline_train,apple_train],axis=0)
validate_ds = pd.concat([twitter_validate,airline_validate,apple_validate],axis=0)
test_ds = pd.concat([twitter_test,airline_test,apple_test],axis=0)

In [10]:
# Unique values of sentiment
twitter_sentiment_labels = train_ds['sentiment'].unique()

In [14]:
train_ds.size, validate_ds.size, test_ds.size

(402588, 17112, 145548)

In [17]:
%%time
bertweet_experiment_full = sst.experiment(
    [train_ds], # 
    dataset.bert_tweet_phi,
    dataset.fit_softmax_classifier,
    assess_dataframes=[validate_ds],
    vectorize=False)

              precision    recall  f1-score   support

  Irrelevant      0.598     0.294     0.394       177
    Negative      0.754     0.899     0.820      1285
     Neutral      0.679     0.561     0.614       802
    Positive      0.661     0.641     0.651       588

    accuracy                          0.713      2852
   macro avg      0.673     0.599     0.620      2852
weighted avg      0.704     0.713     0.701      2852

CPU times: user 12h 23min 43s, sys: 12min 58s, total: 12h 36min 41s
Wall time: 2h 12min 44s


In [18]:
bertweet_experiment_full.keys()

dict_keys(['model', 'phi', 'train_dataset', 'assess_datasets', 'predictions', 'metric', 'scores'])

In [19]:
bertweet_experiment_full['scores']

[0.6198544473498927]

In [20]:
bertweet_experiment_full['metric']

'safe_macro_f1'

In [21]:
bertweet_experiment_full['model']

LogisticRegression(multi_class='ovr', solver='liblinear')

In [22]:
fname = 'temp_bertweet_experiment_full_combined.sav'
pickle.dump(bertweet_experiment_full, open(fname, 'wb'))

# Test BERT trained on Tweets on test set

In [None]:
def predict_one_bert(text):
    # List of tokenized examples:
    X = [bertweet_experiment_full['phi'](text)]
    # Standard `predict` step on a list of lists of str:
    preds = bertweet_experiment_full['model'].predict(X)
    # Be sure to return the only member of the predictions,
    # rather than the singleton list:
    return preds[0]

In [None]:
# %% time
# twitter_test['prediction'] = twitter_test['text'].apply(predict_one_bert)

In [None]:
# import importlib
# importlib.reload(sst)

In [23]:
%%time
bert_test = sst.evaluate(
    bertweet_experiment_full['model'],
    bertweet_experiment_full['phi'],
    assess_dataframes=[test_ds],
    vectorizer=bertweet_experiment_full['assess_datasets'][0]['vectorizer'],
    vectorize=False
)

              precision    recall  f1-score   support

  Irrelevant      0.497     0.259     0.340      3871
    Negative      0.649     0.759     0.700      7905
     Neutral      0.556     0.529     0.543      5990
    Positive      0.581     0.654     0.616      6492

    accuracy                          0.594     24258
   macro avg      0.571     0.550     0.550     24258
weighted avg      0.584     0.594     0.581     24258

CPU times: user 4h 13min 56s, sys: 4min 25s, total: 4h 18min 21s
Wall time: 43min 12s


In [24]:
type(bert_test['predictions'][0])

numpy.ndarray

In [25]:
predictions_fname ='results/BERTweet_predictions_on_combined_twitter_test.csv'
df = bert_test['predictions'][0]
pd.DataFrame(df).to_csv(predictions_fname)

In [26]:
encoding_fname ='results/BERTweet_encodings_on_combined_twitter_test.csv'
encoded_test = bert_test['assess_datasets'][0]
pd.DataFrame(encoded_test).to_csv(encoding_fname)

In [27]:
predictions_df = pd.DataFrame(df)
predictions_df = predictions_df.set_index(test_ds.index)
predictions_df

Unnamed: 0,0
2,Positive
3,Irrelevant
7,Positive
14,Neutral
17,Neutral
...,...
3836,Neutral
3846,Neutral
3858,Neutral
3870,Neutral


In [28]:
test_ds['BERTweet_sentiment'] = predictions_df

In [29]:
test_ds

Unnamed: 0,dataset,tweet_id,text,sentiment,entity,airline,BERTweet_sentiment
2,twitter_sentiment,1,im getting on borderlands and i will kill you ...,Positive,2401.0,,Positive
3,twitter_sentiment,2,im coming on borderlands and i will murder you...,Positive,2401.0,,Irrelevant
7,twitter_sentiment,3,So I spent a couple of hours doing something f...,Positive,2402.0,,Positive
14,twitter_sentiment,4,"Rock-Hard La Varlope, RARE & POWERFUL, HANDSOM...",Neutral,2403.0,,Neutral
17,twitter_sentiment,5,"I-Hard like me, RARE LONDON DE, HANDSOME 2011,...",Neutral,2403.0,,Neutral
...,...,...,...,...,...,...,...
3836,twitter_apple,623499366,@Apple recruiting luxury executives for iwatch...,Neutral,,,Neutral
3846,twitter_apple,623499376,RT @TeamCavuto: Protesters stage #DieIn protes...,Neutral,,,Neutral
3858,twitter_apple,623499388,Apple Is Warming Up To Social Media: Apple is ...,Neutral,,,Neutral
3870,twitter_apple,623499400,Apple Is Warming Up To Social Media: Apple is ...,Neutral,,,Neutral


In [30]:
test_predictions_fname ='results/BERTweet_predictions_added_to_combined_twitter_test.csv'
test_ds.to_csv(test_predictions_fname)

In [31]:
correct = test_ds[test_ds['sentiment'] == test_ds['BERTweet_sentiment']]

In [32]:
correct

Unnamed: 0,dataset,tweet_id,text,sentiment,entity,airline,BERTweet_sentiment
2,twitter_sentiment,1,im getting on borderlands and i will kill you ...,Positive,2401.0,,Positive
7,twitter_sentiment,3,So I spent a couple of hours doing something f...,Positive,2402.0,,Positive
14,twitter_sentiment,4,"Rock-Hard La Varlope, RARE & POWERFUL, HANDSOM...",Neutral,2403.0,,Neutral
17,twitter_sentiment,5,"I-Hard like me, RARE LONDON DE, HANDSOME 2011,...",Neutral,2403.0,,Neutral
18,twitter_sentiment,6,that was the first borderlands session in a lo...,Positive,2404.0,,Positive
...,...,...,...,...,...,...,...
3836,twitter_apple,623499366,@Apple recruiting luxury executives for iwatch...,Neutral,,,Neutral
3846,twitter_apple,623499376,RT @TeamCavuto: Protesters stage #DieIn protes...,Neutral,,,Neutral
3858,twitter_apple,623499388,Apple Is Warming Up To Social Media: Apple is ...,Neutral,,,Neutral
3870,twitter_apple,623499400,Apple Is Warming Up To Social Media: Apple is ...,Neutral,,,Neutral


In [33]:
incorrect = test_ds[test_ds['sentiment'] != test_ds['BERTweet_sentiment']]

In [34]:
incorrect

Unnamed: 0,dataset,tweet_id,text,sentiment,entity,airline,BERTweet_sentiment
3,twitter_sentiment,2,im coming on borderlands and i will murder you...,Positive,2401.0,,Irrelevant
73,twitter_sentiment,22,imma is probably playing a bit of borderland t...,Positive,2413.0,,Negative
86,twitter_sentiment,25,FUCK YESSSSSSSS.,Positive,2415.0,,Negative
89,twitter_sentiment,27,FUCK YOU.,Positive,2415.0,,Negative
92,twitter_sentiment,28,I hate that this easy mayhem modifier event on...,Neutral,2416.0,,Negative
...,...,...,...,...,...,...,...
3736,twitter_apple,623499266,Why Apple and Microsoft Should be Worried Abou...,Negative,,,Neutral
3737,twitter_apple,623499267,#AAPL:Apple (AAPL) Stock Is Falling Today Amid...,Negative,,,Neutral
3742,twitter_apple,623499272,"Apple Inc., GoPro Inc Products Are Go-To Gifts...",Positive,,,Neutral
3785,twitter_apple,623499315,#Apple releases iOS 8.1.2 with fixes for bugs ...,Positive,,,Neutral


In [35]:
irrelevant = test_ds[test_ds['sentiment'] == 'Irrelevant']
irrelevant

Unnamed: 0,dataset,tweet_id,text,sentiment,entity,airline,BERTweet_sentiment
104,twitter_sentiment,34,Evaluate the (sound) concepts / concepts of Va...,Irrelevant,2418.0,,Positive
105,twitter_sentiment,35,Appreciate the (sonic) concepts / praxis Valen...,Irrelevant,2418.0,,Positive
107,twitter_sentiment,36,Appreciate the (sonic) conversations / actions...,Irrelevant,2418.0,,Positive
128,twitter_sentiment,44,Love these new @ GhostLive cans!! Does anyone ...,Irrelevant,2422.0,,Positive
131,twitter_sentiment,45,do these new @GhostLifestyle<unk> They want dr...,Irrelevant,2422.0,,Negative
...,...,...,...,...,...,...,...
1773,twitter_apple,623497297,'Could Falling Oil Prices Spark A Financial Cr...,Irrelevant,,,Neutral
2572,twitter_apple,623498102,"#AAPL:After Decades Of Consolidation, Wall Str...",Irrelevant,,,Neutral
2971,twitter_apple,623498501,What is going on with #AAPL....SELL OR HOLD.,Irrelevant,,,Negative
3179,twitter_apple,623498709,Time is running out! #trading #stocks #futures...,Irrelevant,,,Neutral


# Save Model

In [36]:
import pickle
model_fname = 'models/BERTweet_twitter_model_combined.sav'
pickle.dump(bertweet_experiment_full['model'], open(model_fname, 'wb'))

In [37]:
bert_test.keys()

dict_keys(['model', 'phi', 'assess_datasets', 'predictions', 'metric', 'scores'])

In [38]:
test_fname = 'results/BERTweet_test_combined.sav'
pickle.dump(bert_test, open(test_fname, 'wb'))