# Use BERT Representations with LogisticRegression Softmax Classifier

In [1]:
from collections import Counter
import os
import numpy as np
import pandas as pd
import torch
from torch import nn, optim
import torch.nn as nn
from torch.utils.data import TensorDataset, Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from transformers import BertTokenizer, BertModel, BertForSequenceClassification


import dataset
import vsm
import sst

In [2]:
TWITTER = 2
TWITTER_AIRLINES = 3
TWITTER_APPLE = 4

In [3]:
twitter_train, twitter_validate, twitter_test =  dataset.dataset_reader(TWITTER)
[twitter_train, twitter_validate, twitter_test] = list(map(lambda ds : dataset.prune_columns(2, ds), [twitter_train, twitter_validate, twitter_test]))

In [4]:
bert_weights_name = 'bert-base-cased'
bert_tokenizer = BertTokenizer.from_pretrained(bert_weights_name)
bert_model = BertModel.from_pretrained(bert_weights_name)
# model = BertForSequenceClassification.from_pretrained(bert_weights_name)
# Unique values of sentiment
twitter_sentiment_labels = twitter_train['sentiment'].unique()

In [5]:
twitter_train.size, twitter_validate.size

(156831, 3000)

In [6]:
%%time
bert_experiment1500 = sst.experiment(
    twitter_train[:1500], # 
    dataset.hf_cls_phi,
    dataset.fit_softmax_classifier,
    assess_dataframes=[twitter_validate[:1000]],
    vectorize=False)

              precision    recall  f1-score   support

  Irrelevant      0.348     0.267     0.303       172
    Negative      0.539     0.624     0.578       266
     Neutral      0.534     0.519     0.527       285
    Positive      0.558     0.570     0.564       277

    accuracy                          0.518      1000
   macro avg      0.495     0.495     0.493      1000
weighted avg      0.510     0.518     0.512      1000

CPU times: user 14min 43s, sys: 5.37 s, total: 14min 49s
Wall time: 2min 29s


In [7]:
%%time
bert_experiment3000 = sst.experiment(
    twitter_train[:3000], # 
    dataset.hf_cls_phi,
    dataset.fit_softmax_classifier,
    assess_dataframes=[twitter_validate[:1000]],
    vectorize=False)

              precision    recall  f1-score   support

  Irrelevant      0.480     0.273     0.348       172
    Negative      0.570     0.643     0.604       266
     Neutral      0.524     0.526     0.525       285
    Positive      0.535     0.610     0.570       277

    accuracy                          0.537      1000
   macro avg      0.527     0.513     0.512      1000
weighted avg      0.532     0.537     0.528      1000

CPU times: user 23min 5s, sys: 7.21 s, total: 23min 13s
Wall time: 3min 55s


In [8]:
%%time
bert_experiment6000 = sst.experiment(
    twitter_train[:6000], # 
    dataset.hf_cls_phi,
    dataset.fit_softmax_classifier,
    assess_dataframes=[twitter_validate[:1500]],
    vectorize=False)

              precision    recall  f1-score   support

  Irrelevant      0.489     0.267     0.346       172
    Negative      0.585     0.711     0.642       266
     Neutral      0.568     0.512     0.539       285
    Positive      0.564     0.664     0.610       277

    accuracy                          0.565      1000
   macro avg      0.552     0.539     0.534      1000
weighted avg      0.558     0.565     0.553      1000

CPU times: user 39min 40s, sys: 13.1 s, total: 39min 53s
Wall time: 6min 46s


In [9]:
%%time
bert_experiment12000 = sst.experiment(
    twitter_train[:12000], # 
    dataset.hf_cls_phi,
    dataset.fit_softmax_classifier,
    assess_dataframes=[twitter_validate[:2000]],
    vectorize=False)

              precision    recall  f1-score   support

  Irrelevant      0.571     0.256     0.353       172
    Negative      0.590     0.726     0.651       266
     Neutral      0.582     0.537     0.558       285
    Positive      0.577     0.693     0.630       277

    accuracy                          0.582      1000
   macro avg      0.580     0.553     0.548      1000
weighted avg      0.581     0.582     0.567      1000

CPU times: user 1h 13min 9s, sys: 24 s, total: 1h 13min 33s
Wall time: 12min 34s


In [10]:
%%time
bert_experiment_full = sst.experiment(
    twitter_train, # 
    dataset.hf_cls_phi,
    dataset.fit_softmax_classifier,
    assess_dataframes=[twitter_validate],
    vectorize=False)

              precision    recall  f1-score   support

  Irrelevant      0.570     0.262     0.359       172
    Negative      0.584     0.771     0.665       266
     Neutral      0.602     0.547     0.574       285
    Positive      0.595     0.668     0.629       277

    accuracy                          0.591      1000
   macro avg      0.588     0.562     0.556      1000
weighted avg      0.590     0.591     0.576      1000

CPU times: user 5h 9min 13s, sys: 1min 55s, total: 5h 11min 9s
Wall time: 53min 34s


In [11]:
bert_experiment_full.keys()

dict_keys(['model', 'phi', 'train_dataset', 'assess_datasets', 'predictions', 'metric', 'scores'])

In [12]:
bert_experiment_full['scores']

[0.5564631305265428]

In [13]:
bert_experiment_full['metric']

'safe_macro_f1'

In [14]:
bert_experiment_full['model']

LogisticRegression(multi_class='ovr', solver='liblinear')

# Test BERT trained on Tweets on test set

In [15]:
def predict_one_bert(text):
    # List of tokenized examples:
    X = [bert_experiment_full['phi'](text)]
    # Standard `predict` step on a list of lists of str:
    preds = bert_experiment_full['model'].predict(X)
    # Be sure to return the only member of the predictions,
    # rather than the singleton list:
    return preds[0]

In [16]:
# %% time
# twitter_test['prediction'] = twitter_test['text'].apply(predict_one_bert)

In [17]:
import importlib
importlib.reload(sst)

<module 'sst' from '/mnt/c/Users/echya/Documents/XCS224U - 007 Natural Language Understanding/CS224-final-project/sst.py'>

In [18]:
%%time
bert_test = sst.evaluate(
    bert_experiment_full['model'],
    bert_experiment_full['phi'],
    assess_dataframes=[twitter_test],
    vectorizer=bert_experiment_full['assess_datasets'][0]['vectorizer'],
    vectorize=False
)

              precision    recall  f1-score   support

  Irrelevant      0.457     0.254     0.326      3943
    Negative      0.591     0.721     0.650      6682
     Neutral      0.554     0.515     0.534      5514
    Positive      0.572     0.634     0.601      6266

    accuracy                          0.564     22405
   macro avg      0.543     0.531     0.528     22405
weighted avg      0.553     0.564     0.551     22405

CPU times: user 2h 5min 50s, sys: 43.3 s, total: 2h 6min 34s
Wall time: 21min 8s


In [19]:
type(bert_test['predictions'][0])

numpy.ndarray

In [20]:
predictions_fname ='results/BERT_predictions_on_twitter_test.csv'
df = bert_test['predictions'][0]
pd.DataFrame(df).to_csv(predictions_fname)

In [21]:
encoding_fname ='results/BERT_encodings_on_twitter_test.csv'
encoded_test = bert_test['assess_datasets'][0]
pd.DataFrame(df).to_csv(encoding_fname)

In [22]:
predictions_df = pd.DataFrame(df)
predictions_df = predictions_df.set_index(twitter_test.index)
predictions_df

Unnamed: 0,0
1,Neutral
3,Irrelevant
5,Negative
15,Neutral
18,Positive
...,...
74670,Positive
74672,Neutral
74674,Irrelevant
74676,Negative


In [23]:
twitter_test['BERT_sentiment'] = predictions_df

In [24]:
twitter_test

Unnamed: 0,text,sentiment,entity,BERT_sentiment
1,I am coming to the borders and I will kill you...,Positive,2401,Neutral
3,im coming on borderlands and i will murder you...,Positive,2401,Irrelevant
5,im getting into borderlands and i can murder y...,Positive,2401,Negative
15,"Rock-Hard La Vita, RARE BUT POWERFUL, HANDSOME...",Neutral,2403,Neutral
18,that was the first borderlands session in a lo...,Positive,2404,Positive
...,...,...,...,...
74670,Let no elim go unnoticed. . . . NVIDIA Highlig...,Positive,9199,Positive
74672,Let no elite go unnoticed... NVIDIA Highlights...,Positive,9199,Neutral
74674,Let a no information elim that go unnoticed......,Positive,9199,Irrelevant
74676,Just realized the windows partition of my Mac ...,Positive,9200,Negative


In [25]:
test_predictions_fname ='results/BERT_predictions_added_to_twitter_test.csv'
twitter_test.to_csv(test_predictions_fname)

In [26]:
correct = twitter_test[twitter_test['sentiment'] == twitter_test['BERT_sentiment']]

In [27]:
correct

Unnamed: 0,text,sentiment,entity,BERT_sentiment
15,"Rock-Hard La Vita, RARE BUT POWERFUL, HANDSOME...",Neutral,2403,Neutral
18,that was the first borderlands session in a lo...,Positive,2404,Positive
19,this was the first Borderlands session in a lo...,Positive,2404,Positive
25,The biggest disappointment of my life came a y...,Negative,2405,Negative
27,the biggest dissappoinment in my life coming o...,Negative,2405,Negative
...,...,...,...,...
74651,NVIDIA released its substantial update for its...,Neutral,9195,Neutral
74664,Nvidia really delayed the 3070 2 weeks .,Negative,9198,Negative
74668,Nvidia really only delayed the 3070 2 flight w...,Negative,9198,Negative
74669,Nvidia really delayed the next 2 weeks.,Negative,9198,Negative


In [28]:
incorrect = twitter_test[twitter_test['sentiment'] != twitter_test['BERT_sentiment']]

In [29]:
incorrect

Unnamed: 0,text,sentiment,entity,BERT_sentiment
1,I am coming to the borders and I will kill you...,Positive,2401,Neutral
3,im coming on borderlands and i will murder you...,Positive,2401,Irrelevant
5,im getting into borderlands and i can murder y...,Positive,2401,Negative
37,Man Gearbox really needs to fix these disappoi...,Negative,2407,Positive
44,Watch this epic striptease!.,Neutral,2408,Positive
...,...,...,...,...
74667,Nvidia really delayed the 3070 several weeks.,Negative,9198,Positive
74672,Let no elite go unnoticed... NVIDIA Highlights...,Positive,9199,Neutral
74674,Let a no information elim that go unnoticed......,Positive,9199,Irrelevant
74676,Just realized the windows partition of my Mac ...,Positive,9200,Negative


In [30]:
irrelevant = twitter_test[twitter_test['sentiment'] == 'Irrelevant']
irrelevant

Unnamed: 0,text,sentiment,entity,BERT_sentiment
102,Appreciate the (sonic) concepts / praxis Valen...,Irrelevant,2418,Neutral
104,Evaluate the (sound) concepts / concepts of Va...,Irrelevant,2418,Positive
107,Appreciate the (sonic) conversations / actions...,Irrelevant,2418,Neutral
128,Love these new @ GhostLive cans!! Does anyone ...,Irrelevant,2422,Neutral
129,Loving these new @GhostLifestyle cans!! Anyone...,Irrelevant,2422,Irrelevant
...,...,...,...,...
73962,I know this asset they are referring to (it's ...,Irrelevant,9071,Negative
73963,I know this asset they're referring to (it's n...,Irrelevant,9071,Negative
73993,"Oh, yes, I used to work with these guys. Secon...",Irrelevant,9077,Positive
73997,"Oh, yeah, I used to work under these guys. Tot...",Irrelevant,9077,Positive


# Save Model

In [31]:
import pickle
model_fname = 'models/BERT_twitter_model.sav'
pickle.dump(bert_experiment_full['model'], open(model_fname, 'wb'))