# Use BERT Representations with LogisticRegression Softmax Classifier

In [1]:
from collections import Counter
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from transformers import BertTokenizer, BertModel, BertForSequenceClassification


import dataset
import vsm
import sst

In [2]:
TWITTER = 2
TWITTER_AIRLINES = 3
TWITTER_APPLE = 4

In [3]:
twitter_train, twitter_validate, twitter_test =  dataset.dataset_reader(TWITTER_APPLE)
[twitter_train, twitter_validate, twitter_test] = list(map(lambda ds : dataset.prune_columns(TWITTER_APPLE, ds), [twitter_train, twitter_validate, twitter_test]))

In [None]:
# Rename labels for consistency
def rename_sentiment(sentiment):
        if sentiment in ["5", "positive"]:
            return "Positive"
        elif sentiment in ["3", "neutral"]:
            return "Neutral"
        elif sentiment in ["1", "negative"]:
            return "Negative"
        elif sentiment in ["not_relevant","Irrelevant"]:
            return "Neutral"
        else:
            return sentiment

In [None]:
twitter_train['sentiment'] = pd.DataFrame([rename_sentiment(e) for e in twitter_train['sentiment']]).set_index(twitter_train.index)
twitter_validate['sentiment'] = pd.DataFrame([rename_sentiment(e) for e in twitter_validate['sentiment']]).set_index(twitter_validate.index)
twitter_test['sentiment'] = pd.DataFrame([rename_sentiment(e) for e in twitter_test['sentiment']]).set_index(twitter_test.index)

In [4]:
# bert_weights_name = 'bert-base-cased'
# bert_tokenizer = BertTokenizer.from_pretrained(bert_weights_name)
# bert_model = BertModel.from_pretrained(bert_weights_name)
# model = BertForSequenceClassification.from_pretrained(bert_weights_name)
# Unique values of sentiment
twitter_sentiment_labels = twitter_train['sentiment'].unique()

In [5]:
twitter_train.size, twitter_validate.size, twitter_test.size

(9327, 1164, 1167)

In [6]:
%%time
bert_experiment1500 = sst.experiment(
    twitter_train[:1500], # 
    dataset.hf_cls_phi,
    dataset.fit_softmax_classifier,
    assess_dataframes=[twitter_validate[:1000]],
    vectorize=False)

              precision    recall  f1-score   support

           1      0.727     0.611     0.664       131
           3      0.712     0.856     0.777       208
           5      0.667     0.409     0.507        44
not_relevant      0.000     0.000     0.000         5

    accuracy                          0.711       388
   macro avg      0.526     0.469     0.487       388
weighted avg      0.703     0.711     0.698       388

CPU times: user 27min 43s, sys: 30 s, total: 28min 13s
Wall time: 7min 1s


In [7]:
%%time
bert_experiment_full = sst.experiment(
    twitter_train, # 
    dataset.hf_cls_phi,
    dataset.fit_softmax_classifier,
    assess_dataframes=[twitter_validate],
    vectorize=False)

              precision    recall  f1-score   support

           1      0.737     0.641     0.686       131
           3      0.740     0.875     0.802       208
           5      0.667     0.409     0.507        44
not_relevant      0.000     0.000     0.000         5

    accuracy                          0.732       388
   macro avg      0.536     0.481     0.499       388
weighted avg      0.721     0.732     0.719       388

CPU times: user 45min 41s, sys: 45.7 s, total: 46min 27s
Wall time: 11min 34s


In [8]:
bert_experiment_full.keys()

dict_keys(['model', 'phi', 'train_dataset', 'assess_datasets', 'predictions', 'metric', 'scores'])

In [9]:
bert_experiment_full['scores']

[0.49862966344321435]

In [10]:
bert_experiment_full['metric']

'safe_macro_f1'

In [11]:
bert_experiment_full['model']

LogisticRegression(multi_class='ovr', solver='liblinear')

# Test BERT trained on Tweets on test set

In [12]:
def predict_one_bert(text):
    # List of tokenized examples:
    X = [bert_experiment_full['phi'](text)]
    # Standard `predict` step on a list of lists of str:
    preds = bert_experiment_full['model'].predict(X)
    # Be sure to return the only member of the predictions,
    # rather than the singleton list:
    return preds[0]

In [13]:
# %% time
# twitter_test['prediction'] = twitter_test['text'].apply(predict_one_bert)

In [14]:
import importlib
importlib.reload(sst)

<module 'sst' from '/mnt/c/Users/echya/Documents/XCS224U - 007 Natural Language Understanding/CS224-final-project/sst.py'>

In [15]:
%%time
bert_test = sst.evaluate(
    bert_experiment_full['model'],
    bert_experiment_full['phi'],
    assess_dataframes=[twitter_test],
    vectorizer=bert_experiment_full['assess_datasets'][0]['vectorizer'],
    vectorize=False
)

              precision    recall  f1-score   support

           1      0.728     0.620     0.670       121
           3      0.730     0.885     0.800       217
           5      0.609     0.304     0.406        46
not_relevant      0.000     0.000     0.000         5

    accuracy                          0.722       389
   macro avg      0.517     0.452     0.469       389
weighted avg      0.706     0.722     0.703       389

CPU times: user 4min 56s, sys: 4.66 s, total: 5min 1s
Wall time: 1min 14s


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [16]:
type(bert_test['predictions'][0])

numpy.ndarray

In [17]:
predictions_fname ='results/BERT_predictions_on_twitter_test_apple.csv'
df = bert_test['predictions'][0]
pd.DataFrame(df).to_csv(predictions_fname)

In [18]:
encoding_fname ='results/BERT_encodings_on_twitter_test_apple.csv'
encoded_test = bert_test['assess_datasets'][0]
pd.DataFrame(df).to_csv(encoding_fname)

In [19]:
predictions_df = pd.DataFrame(df)
predictions_df = predictions_df.set_index(twitter_test.index)
predictions_df

Unnamed: 0,0
22,3
31,3
49,3
52,3
58,3
...,...
3845,3
3859,3
3868,3
3870,3


In [20]:
twitter_test['BERT_sentiment'] = predictions_df

In [21]:
twitter_test

Unnamed: 0,tweet_id,text,sentiment,BERT_sentiment
22,623495535,@robconeybeer: You need an IP portfolio to def...,3,3
31,623495544,@thehill @Apple i cite the 4th amendment as a ...,3,3
49,623495562,RT @thehill: Justice Department cites 18th cen...,3,3
52,623495565,This one chart explains @tim_cook's affect on ...,3,3
58,623495571,5 Companies Growing Faster Than Apple Inc. htt...,3,3
...,...,...,...,...
3845,623499375,media reports say that @Apple is hiring pros f...,3,3
3859,623499389,Apple Is Warming Up To Social Media: Apple is ...,3,3
3868,623499398,Apple Is Warming Up To Social Media: Apple is ...,5,3
3870,623499400,Apple Is Warming Up To Social Media: Apple is ...,3,3


In [22]:
test_predictions_fname ='results/BERT_predictions_added_to_twitter_test_apple.csv'
twitter_test.to_csv(test_predictions_fname)

In [23]:
correct = twitter_test[twitter_test['sentiment'] == twitter_test['BERT_sentiment']]

In [24]:
correct

Unnamed: 0,tweet_id,text,sentiment,BERT_sentiment
22,623495535,@robconeybeer: You need an IP portfolio to def...,3,3
31,623495544,@thehill @Apple i cite the 4th amendment as a ...,3,3
49,623495562,RT @thehill: Justice Department cites 18th cen...,3,3
52,623495565,This one chart explains @tim_cook's affect on ...,3,3
58,623495571,5 Companies Growing Faster Than Apple Inc. htt...,3,3
...,...,...,...,...
3812,623499342,@Apple co-founder Steve Wozniak talks about St...,3,3
3836,623499366,@Apple recruiting luxury executives for iwatch...,3,3
3845,623499375,media reports say that @Apple is hiring pros f...,3,3
3859,623499389,Apple Is Warming Up To Social Media: Apple is ...,3,3


In [25]:
incorrect = twitter_test[twitter_test['sentiment'] != twitter_test['BERT_sentiment']]

In [26]:
incorrect

Unnamed: 0,tweet_id,text,sentiment,BERT_sentiment
73,623495586,Buy round lot on the open MT @WSJD #AAPL stock...,1,3
80,623495593,I'm hoping @apple won't automatically make us ...,1,3
129,623495642,#GoPro: What Competition? Part 2 #GPRO #AAPL ...,not_relevant,3
144,623495657,@apple and @facebook I speak for all of humani...,1,3
214,623495727,Apple tumbles as much as 6 percent in unusual ...,1,3
...,...,...,...,...
3790,623499320,Five Apple predictions for 2015. I disagree ab...,5,3
3800,623499330,"Please, @Apple, don't go the way @Facebook has...",1,3
3809,623499339,BLOCK TRADE detected in #AAPL,1,3
3868,623499398,Apple Is Warming Up To Social Media: Apple is ...,5,3


In [27]:
irrelevant = twitter_test[twitter_test['sentiment'] == 'not_relevant']
irrelevant

Unnamed: 0,tweet_id,text,sentiment,BERT_sentiment
129,623495642,#GoPro: What Competition? Part 2 #GPRO #AAPL ...,not_relevant,3
1814,623497338,@kibs33 Dizzy-doings on Wall Street tomorrow? ...,not_relevant,3
2183,623497713,@Apple honey crisp apple for the win,not_relevant,3
2572,623498102,"#AAPL:After Decades Of Consolidation, Wall Str...",not_relevant,3
2586,623498116,#Ebay #Music @@Apple TV2 Gen Jailbroken Unteth...,not_relevant,3


In [28]:
import pickle
model_fname = 'models/BERT_twitter_apple.sav'
pickle.dump(bert_experiment_full, open(model_fname, 'wb'))

In [None]:
test_fname = 'results/BERT_test_apple_neutral.sav'
pickle.dump(bert_test, open(test_fname, 'wb'))