## Product Sentiment Data

Data (public domain): https://data.world/crowdflower/brands-and-product-emotions

Notebook code based on IMDB notebook from bert-sklearn/other_examples

In [3]:
import numpy as np
import pandas as pd
import os
import sys
import csv
import re
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.utils import shuffle
from ftfy import fix_text
 
from bert_sklearn import BertClassifier
from bert_sklearn import load_model

print(os.getcwd())

DATAFILE = "./data/judge-cleaned-up.csv"

/Users/joep/Desktop/NashAnalytics2019/NAS2019


In [4]:
# Prep Data

def cleanup(txt):
    return fix_text(txt)
    
converters = {'tweet_text': cleanup}
    
raw_data = pd.read_csv(DATAFILE, converters=converters, encoding='unicode_escape')
raw_data.head(10)

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,Negative emotion
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,Positive emotion
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,Positive emotion
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,Negative emotion
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Positive emotion
5,@teachntech00 New iPad Apps For #SpeechTherapy...,,No emotion toward brand or product
6,,,No emotion toward brand or product
7,"#SXSW is just starting, #CTIA is around the co...",Android,Positive emotion
8,Beautifully smart and simple idea RT @madebyma...,iPad or iPhone App,Positive emotion
9,Counting down the days to #sxsw plus strong Ca...,Apple,Positive emotion


In [5]:
## Transform columns
## ONLY RUN THIS CELL ONCE!!!

# Add columns to make the labels usable by the model
# tweet_text => text
# Positive / No emotion / Negative => 1, 0, -1
# Product: Apple stuff, Google stuff, NaN => Apple, Google, ''

def clean_text(txt):
    return txt
raw_data.insert(1, "text", np.vectorize(clean_text)(raw_data['tweet_text']))

def create_labels(sentiment):
    if sentiment.startswith('Positive'):
        return 1
    if sentiment.startswith('Negative'):
        return -1
    return 0
raw_data.insert(3, 'label', np.vectorize(create_labels)(raw_data['is_there_an_emotion_directed_at_a_brand_or_product']))

def get_company(product):
    if pd.isnull(product):
        return ''
    if 'iPad' in product or 'iPhone' in product or 'Apple' in product:
        return 'Apple'
    if 'Google' in product or 'Android' in product:
        return 'Google'
    return ''
raw_data.insert(2, 'company', np.vectorize(get_company)(raw_data['emotion_in_tweet_is_directed_at']))
raw_data.head(10)

Unnamed: 0,tweet_text,text,company,emotion_in_tweet_is_directed_at,label,is_there_an_emotion_directed_at_a_brand_or_product
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,Apple,iPhone,-1,Negative emotion
1,@jessedee Know about @fludapp ? Awesome iPad/i...,@jessedee Know about @fludapp ? Awesome iPad/i...,Apple,iPad or iPhone App,1,Positive emotion
2,@swonderlin Can not wait for #iPad 2 also. The...,@swonderlin Can not wait for #iPad 2 also. The...,Apple,iPad,1,Positive emotion
3,@sxsw I hope this year's festival isn't as cra...,@sxsw I hope this year's festival isn't as cra...,Apple,iPad or iPhone App,-1,Negative emotion
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Google,1,Positive emotion
5,@teachntech00 New iPad Apps For #SpeechTherapy...,@teachntech00 New iPad Apps For #SpeechTherapy...,,,0,No emotion toward brand or product
6,,,,,0,No emotion toward brand or product
7,"#SXSW is just starting, #CTIA is around the co...","#SXSW is just starting, #CTIA is around the co...",Google,Android,1,Positive emotion
8,Beautifully smart and simple idea RT @madebyma...,Beautifully smart and simple idea RT @madebyma...,Apple,iPad or iPhone App,1,Positive emotion
9,Counting down the days to #sxsw plus strong Ca...,Counting down the days to #sxsw plus strong Ca...,Apple,Apple,1,Positive emotion


In [6]:
# Last Data Preparation Step
# Clean up characters and pull out columns of interest

def clean(text):
    text = re.sub(r'<.*?>', '', text)
    text = re.sub(r"\"", "", text)       
    return text

data = raw_data.filter(['text', 'company', 'label'], axis=1)
data['text'] = data['text'].transform(clean)

In [8]:
# Split into training and test data

msk = np.random.rand(len(data)) < 0.8
train = data[msk]
test = data[~msk]
print('Training data size: ' + str(train.shape))
print('Test data size: ' + str(test.shape))

Training data size: (7274, 3)
Test data size: (1819, 3)


In [9]:
train[:1].values

array([['.@wesley83 I have a 3G iPhone. After 3 hrs tweeting at #RISE_Austin, it was dead!  I need to upgrade. Plugin stations at #SXSW.',
        'Apple', -1]], dtype=object)

As you can see, each review is much longer than a sentence or two. The Google AI BERT models were trained on sequences of max length 512. Lets look at the performance for max_seq_length equal to  128, 256, and 512.

### max_seq_length = 128

In [14]:
## Set up data for the classifier

train = train.sample(800)
test = test.sample(500)

print("Train data size: %d "%(len(train)))
print("Test data size: %d "%(len(test)))

X_train = train['text']
y_train = train['label']

X_test = test['text']
y_test = test['label']

Train data size: 800 
Test data size: 500 


In [15]:
## Create the model

model = BertClassifier(bert_model='bert-base-uncased', label_list=[-1,0,1])
model.max_seq_length = 128
model.learning_rate = 2e-05
model.epochs = 4

print(model)


Building sklearn text classifier...
BertClassifier(bert_config_json=None, bert_model='bert-base-uncased',
               bert_vocab=None, do_lower_case=None, epochs=4, eval_batch_size=8,
               fp16=False, from_tf=False, gradient_accumulation_steps=1,
               ignore_label=None, label_list=[-1, 0, 1], learning_rate=2e-05,
               local_rank=-1, logfile='bert_sklearn.log', loss_scale=0,
               max_seq_length=128, num_mlp_hiddens=500, num_mlp_layers=0,
               random_state=42, restore_file=None, train_batch_size=32,
               use_cuda=True, validation_fraction=0.1, warmup_proportion=0.1)


In [16]:
%%time
## Train the model using our data (this could take a while)

model.fit(X_train, y_train)

accy = model.score(X_test, y_test)

Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
Loading Pytorch checkpoint
train data size: 720, validation data size: 80



Training  :   0%|          | 0/23 [00:00<?, ?it/s][A
Training  :   0%|          | 0/23 [00:54<?, ?it/s, loss=1.29][A
Training  :   4%|▍         | 1/23 [00:54<19:48, 54.02s/it, loss=1.29][A
Training  :   4%|▍         | 1/23 [01:47<19:48, 54.02s/it, loss=1.27][A
Training  :   9%|▊         | 2/23 [01:47<18:51, 53.90s/it, loss=1.27][A
Training  :   9%|▊         | 2/23 [02:39<18:51, 53.90s/it, loss=1.25][A
Training  :  13%|█▎        | 3/23 [02:39<17:43, 53.19s/it, loss=1.25][A
Training  :  13%|█▎        | 3/23 [03:26<17:43, 53.19s/it, loss=1.21][A
Training  :  17%|█▋        | 4/23 [03:26<16:17, 51.44s/it, loss=1.21][A
Training  :  17%|█▋        | 4/23 [04:21<16:17, 51.44s/it, loss=1.19][A
Training  :  22%|██▏       | 5/23 [04:21<15:45, 52.54s/it, loss=1.19][A
Training  :  22%|██▏       | 5/23 [05:13<15:45, 52.54s/it, loss=1.17][A
Training  :  26%|██▌       | 6/23 [05:13<14:49, 52.33s/it, loss=1.17][A
Training  :  26%|██▌       | 6/23 [06:01<14:49, 52.33s/it, loss=1.16][A
Trai

Epoch 1, Train loss: 0.9567, Val loss: 0.7739, Val accy: 61.25%



Training  :   0%|          | 0/23 [00:00<?, ?it/s][A
Training  :   0%|          | 0/23 [00:56<?, ?it/s, loss=0.843][A
Training  :   4%|▍         | 1/23 [00:56<20:46, 56.66s/it, loss=0.843][A
Training  :   4%|▍         | 1/23 [02:03<20:46, 56.66s/it, loss=0.861][A
Training  :   9%|▊         | 2/23 [02:03<20:54, 59.74s/it, loss=0.861][A
Training  :   9%|▊         | 2/23 [03:00<20:54, 59.74s/it, loss=0.837][A
Training  :  13%|█▎        | 3/23 [03:00<19:35, 58.76s/it, loss=0.837][A
Training  :  13%|█▎        | 3/23 [03:53<19:35, 58.76s/it, loss=0.839][A
Training  :  17%|█▋        | 4/23 [03:53<18:07, 57.26s/it, loss=0.839][A
Training  :  17%|█▋        | 4/23 [04:43<18:07, 57.26s/it, loss=0.814][A
Training  :  22%|██▏       | 5/23 [04:43<16:27, 54.87s/it, loss=0.814][A
Training  :  22%|██▏       | 5/23 [05:36<16:27, 54.87s/it, loss=0.792][A
Training  :  26%|██▌       | 6/23 [05:36<15:24, 54.40s/it, loss=0.792][A
Training  :  26%|██▌       | 6/23 [06:30<15:24, 54.40s/it, loss=0

Epoch 2, Train loss: 0.8302, Val loss: 0.8497, Val accy: 57.50%



Training  :   0%|          | 0/23 [00:00<?, ?it/s][A
Training  :   0%|          | 0/23 [00:53<?, ?it/s, loss=1.02][A
Training  :   4%|▍         | 1/23 [00:53<19:37, 53.53s/it, loss=1.02][A
Training  :   4%|▍         | 1/23 [01:54<19:37, 53.53s/it, loss=1.03][A
Training  :   9%|▊         | 2/23 [01:54<19:32, 55.83s/it, loss=1.03][A
Training  :   9%|▊         | 2/23 [02:47<19:32, 55.83s/it, loss=0.95][A
Training  :  13%|█▎        | 3/23 [02:47<18:16, 54.83s/it, loss=0.95][A
Training  :  13%|█▎        | 3/23 [03:42<18:16, 54.83s/it, loss=0.911][A
Training  :  17%|█▋        | 4/23 [03:42<17:23, 54.92s/it, loss=0.911][A
Training  :  17%|█▋        | 4/23 [04:35<17:23, 54.92s/it, loss=0.895][A
Training  :  22%|██▏       | 5/23 [04:35<16:20, 54.45s/it, loss=0.895][A
Training  :  22%|██▏       | 5/23 [05:27<16:20, 54.45s/it, loss=0.885][A
Training  :  26%|██▌       | 6/23 [05:27<15:14, 53.79s/it, loss=0.885][A
Training  :  26%|██▌       | 6/23 [06:20<15:14, 53.79s/it, loss=0.849]

Epoch 3, Train loss: 0.7894, Val loss: 0.8008, Val accy: 56.25%



Training  :   0%|          | 0/23 [00:00<?, ?it/s][A
Training  :   0%|          | 0/23 [00:53<?, ?it/s, loss=0.643][A
Training  :   4%|▍         | 1/23 [00:53<19:46, 53.93s/it, loss=0.643][A
Training  :   4%|▍         | 1/23 [01:54<19:46, 53.93s/it, loss=0.653][A
Training  :   9%|▊         | 2/23 [01:54<19:32, 55.82s/it, loss=0.653][A
Training  :   9%|▊         | 2/23 [02:49<19:32, 55.82s/it, loss=0.63] [A
Training  :  13%|█▎        | 3/23 [02:49<18:31, 55.58s/it, loss=0.63][A
Training  :  13%|█▎        | 3/23 [03:39<18:31, 55.58s/it, loss=0.717][A
Training  :  17%|█▋        | 4/23 [03:39<17:05, 53.99s/it, loss=0.717][A
Training  :  17%|█▋        | 4/23 [04:31<17:05, 53.99s/it, loss=0.692][A
Training  :  22%|██▏       | 5/23 [04:31<16:01, 53.43s/it, loss=0.692][A
Training  :  22%|██▏       | 5/23 [05:26<16:01, 53.43s/it, loss=0.677][A
Training  :  26%|██▌       | 6/23 [05:26<15:15, 53.84s/it, loss=0.677][A
Training  :  26%|██▌       | 6/23 [06:17<15:15, 53.84s/it, loss=0.

Epoch 4, Train loss: 0.7023, Val loss: 0.7511, Val accy: 65.00%



Testing:   0%|          | 0/63 [00:00<?, ?it/s][A
Testing:   2%|▏         | 1/63 [00:03<03:17,  3.18s/it][A
Testing:   3%|▎         | 2/63 [00:06<03:09,  3.10s/it][A
Testing:   5%|▍         | 3/63 [00:08<03:02,  3.04s/it][A
Testing:   6%|▋         | 4/63 [00:11<02:56,  2.99s/it][A
Testing:   8%|▊         | 5/63 [00:14<02:52,  2.97s/it][A
Testing:  10%|▉         | 6/63 [00:18<02:59,  3.15s/it][A
Testing:  11%|█         | 7/63 [00:21<02:53,  3.09s/it][A
Testing:  13%|█▎        | 8/63 [00:24<02:51,  3.12s/it][A
Testing:  14%|█▍        | 9/63 [00:27<02:45,  3.07s/it][A
Testing:  16%|█▌        | 10/63 [00:30<02:39,  3.01s/it][A
Testing:  17%|█▋        | 11/63 [00:33<02:34,  2.98s/it][A
Testing:  19%|█▉        | 12/63 [00:36<02:30,  2.95s/it][A
Testing:  21%|██        | 13/63 [00:38<02:26,  2.93s/it][A
Testing:  22%|██▏       | 14/63 [00:41<02:23,  2.92s/it][A
Testing:  24%|██▍       | 15/63 [00:44<02:22,  2.96s/it][A
Testing:  25%|██▌       | 16/63 [00:48<02:21,  3.00s/it]


Loss: 0.8682, Accuracy: 60.40%
CPU times: user 2h 5min 2s, sys: 18min 23s, total: 2h 23min 25s
Wall time: 1h 24min 19s


In [51]:
%%time
## Test out the model with our own invented examples!

examples = [
    'This Android product is not very good',
    'I could not get that iPhone to work, so I sent it back. I''m really upset!',
    'Another great product from the folks at Google!  We really liked it a lot',
    'My iPad is essential - of course I would buy another one!','
    'When in the course of human events it becomes necessary to dissolve those ties...',
    'We the people, in order to form a more perfect union, establish justice, insure domestic tranquility, ...'
]

print(model.predict_proba(examples))
    


Predicting:   0%|          | 0/1 [00:00<?, ?it/s][A
Predicting: 100%|██████████| 1/1 [00:01<00:00,  1.95s/it][A

[[0.21474382 0.08676378 0.6984924 ]
 [0.06963811 0.17175971 0.7586022 ]
 [0.05299675 0.7281567  0.21884665]
 [0.07872742 0.5267376  0.39453495]
 [0.04716099 0.6724587  0.2803803 ]]
CPU times: user 3.5 s, sys: 330 ms, total: 3.83 s
Wall time: 2.03 s


In [52]:
model.save('models/model1_128_bb_uncased.mdl')

### max_seq_length = 256

In [None]:
%%time
## Don't use this one - it will take a very long time!

model = BertClassifier(bert_model='bert-base-uncased', label_list=[-1,0,1])
model.max_seq_length = 256
model.train_batch_size = 32
model.learning_rate = 2e-05
model.epochs = 4

print(model)

model.fit(X_train, y_train)

accy = model.score(X_test, y_test)

### max_seq_length = 512

In [None]:
%%time
## Don't use this one - it will take the longest of all!

model = BertClassifier(bert_model='bert-base-uncased', label_list=[-1,0,1])
model.max_seq_length = 512

# max_seq_length=512 will use a lot more GPU mem, so I am turning down batch size 
# and adding gradient accumulation steps
model.train_batch_size = 16
model_gradient_accumulation_steps = 4

model.learning_rate = 2e-05
model.epochs = 4

print(model)

model.fit(X_train, y_train)

accy = model.score(X_test, y_test)