# Baseline Model



## Mount Shared Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
max_length = 50

#### 5.1 Load Polarity Model

In [None]:
!pip install gensim==3.8.3 --quiet
!pip install tensorflow-datasets --quiet
!pip install -U tensorflow-text==2.8.2 --quiet
!pip install pydot --quiet
!pip install transformers --quiet

[K     |████████████████████████████████| 24.2 MB 1.4 MB/s 
[K     |████████████████████████████████| 4.9 MB 12.1 MB/s 
[K     |████████████████████████████████| 4.7 MB 11.5 MB/s 
[K     |████████████████████████████████| 6.6 MB 51.9 MB/s 
[K     |████████████████████████████████| 101 kB 13.2 MB/s 
[K     |████████████████████████████████| 596 kB 61.8 MB/s 
[?25h

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

from tensorflow.keras.layers import Embedding, Input, Dense, Lambda
from tensorflow.keras.models import Model
import tensorflow.keras.backend as K
import tensorflow_datasets as tfds
import tensorflow_text as tf_text


import sklearn as sk
import os
import nltk
from nltk.corpus import reuters
from nltk.data import find

import matplotlib.pyplot as plt
import transformers
from transformers import BertTokenizer, TFBertModel

import re
import gensim
import pandas as pd
import numpy as np
from transformers import BertTokenizer, TFBertModel

In [None]:
# Set Directory
os.chdir('/content/drive/MyDrive/wzx/data')

In [None]:
os.listdir()

['new_balanced_df.csv',
 'train.csv',
 'valid.csv',
 'test.csv',
 'Electronics_new_balanced_df.csv',
 'twitter_processed.csv',
 'train_amazon_full.csv',
 'valid_amazon_full.csv',
 'test_amazon_full.csv']

## Data

In [None]:
# Read Data
train = pd.read_csv('/content/drive/MyDrive/wzx/data/train_amazon_full.csv')#.sample(n = 1000)
test = pd.read_csv('/content/drive/MyDrive/wzx/data/test_amazon_full.csv')#.sample(n = 1000)
valid = pd.read_csv('/content/drive/MyDrive/wzx/data/valid_amazon_full.csv')#.sample(n = 1000)


train['reviewText'] = train['reviewText'].astype(str)
valid['reviewText'] = valid['reviewText'].astype(str)
test['reviewText'] = test['reviewText'].astype(str)

In [None]:
train.head()

Unnamed: 0,reviewText,rating_label
0,UPDATED (title also was meant to say med not l...,0
1,This is the 3 pair that don't wark i would not...,0
2,Nice comfortable belt.,1
3,"it was long and boring,,the historical facts w...",0
4,I have never had wireless headphones and these...,1


In [None]:
# Training
train_list = []
train_label = []
for index, row in train.iterrows():
  train_list.append(row['reviewText'])
  train_label.append(row['rating_label'])

# Valid
valid_list = []
valid_label = []
for index, row in valid.iterrows():
  valid_list.append(row['reviewText'])
  valid_label.append(row['rating_label'])

# Testing
test_list = []
test_label = []
for index, row in test.iterrows():
  test_list.append(row['reviewText'])
  test_label.append(row['rating_label'])

print("Number of Training Data:",len(train_label))
print("Number of Valid Data:",len(valid_label))
print("Number of Test Data:",len(test_label))

Number of Training Data: 1367328
Number of Valid Data: 455776
Number of Test Data: 455780


## Model Training

In [None]:
# Data Preprocessing
bert_tokenizer = transformers.BertTokenizer.from_pretrained('bert-base-cased')

Downloading vocab.txt:   0%|          | 0.00/208k [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
max_length = 50 # can set to 100

x_train = bert_tokenizer(train_list, 
              max_length=max_length,
              truncation=True,
              padding='max_length', 
              return_tensors='tf')
y_train = train_label

x_valid = bert_tokenizer(valid_list, 
              max_length=max_length,
              truncation=True,
              padding='max_length', 
              return_tensors='tf')
y_valid = valid_label

x_test = bert_tokenizer(test_list, 
              max_length=max_length,
              truncation=True,
              padding='max_length', 
              return_tensors='tf')
y_test = test_label

In [None]:
## Load baseline model
def create_baseline_model(train_layers=-1,
                          hidden_size = 100, 
                          dropout=0.3,
                          learning_rate=0.00005):

    bert_model = TFBertModel.from_pretrained('bert-base-cased')


    #Inputs
    input_ids = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int64, name='input_ids_layer_baseline')
    token_type_ids = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int64, name='token_type_ids_layer_baseline')
    attention_mask = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int64, name='attention_mask_layer_baseline')

    bert_inputs = {'input_ids': input_ids,
                   'token_type_ids': token_type_ids,
                   'attention_mask': attention_mask}         

    bert_out = bert_model(bert_inputs)

    pooled_token = bert_out[1]

    #Dense Hidden Layer
    hidden = tf.keras.layers.Dense(hidden_size, activation='relu', name='hidden_layer_baseline')(pooled_token)
    hidden = tf.keras.layers.Dropout(dropout)(hidden)  

    classification = tf.keras.layers.Dense(1, activation='sigmoid',name='classification_layer_baseline')(hidden)

    
    classification_model = tf.keras.Model(inputs=[input_ids, token_type_ids, attention_mask], outputs=[classification])
    
    classification_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                            loss=tf.keras.losses.BinaryCrossentropy(from_logits=False), 
                            metrics='accuracy') 


    return classification_model

In [None]:
baseline_model = create_baseline_model()
baseline_model.summary()

Downloading tf_model.h5:   0%|          | 0.00/502M [00:00<?, ?B/s]

Some layers from the model checkpoint at bert-base-cased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 attention_mask_layer_baseline   [(None, 50)]        0           []                               
 (InputLayer)                                                                                     
                                                                                                  
 input_ids_layer_baseline (Inpu  [(None, 50)]        0           []                               
 tLayer)                                                                                          
                                                                                                  
 token_type_ids_layer_baseline   [(None, 50)]        0           []                               
 (InputLayer)                                                                                 

In [None]:
### Baseline MODEL TRAINING
# '/content/drive/MyDrive/wzx/data/train.csv'
checkpoint_path = '/content/drive/MyDrive/wzx/baseline/baseline_amazon_full_best_weights.h5'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path ,
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

baseline_model.fit([x_train.input_ids, x_train.token_type_ids, x_train.attention_mask], 
                                                  np.array(y_train),   
                                                  validation_data=([x_valid.input_ids, x_valid.token_type_ids, x_valid.attention_mask], np.array(y_valid)),    
                                                  batch_size=64, 
                                                  epochs=5,
                                                  callbacks=[model_checkpoint_callback])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f4102a4eb50>

In [None]:
baseline_model_path = '/content/drive/MyDrive/wzx/baseline/baseline_amazon_full_best_weights.h5'

baseline_model = create_baseline_model()
baseline_model.load_weights(baseline_model_path)

Downloading tf_model.h5:   0%|          | 0.00/502M [00:00<?, ?B/s]

Some layers from the model checkpoint at bert-base-cased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:
score = baseline_model.evaluate([x_test.input_ids, x_test.token_type_ids, x_test.attention_mask], 
                                                  np.array(y_test)) 

print('Test loss:', score[0]) 
print('Test accuracy:', np.round(score[1], 5))

Test loss: 0.15862727165222168
Test accuracy: 0.949


In [None]:
import pandas as pd
import numpy as np

def generate_txt_pred(model, model_input, text, truth):
  pred_prob = model.predict_on_batch(model_input)
  pred_prob = list(np.concatenate(pred_prob).flat)
  output = pd.DataFrame({
      'Text': text, 
      'Predicted_Prob': pred_prob,
      'Label': truth
                        }
                        )
  output['Predicted_Label'] = output['Predicted_Prob'].round(0)
  output['Correct'] = output['Predicted_Label'] == output['Label']
  return output

In [None]:
total = len(x_test.input_ids)
chunk_size = 3000
chunk_num = total // chunk_size



In [None]:
chunk_num

151

In [None]:

test_output = pd.DataFrame()

for i in range(chunk_num):
  print(f'chunk {i}')
  out_df = generate_txt_pred(baseline_model, [x_test.input_ids[i * chunk_size: (i + 1) * chunk_size], 
                                              x_test.token_type_ids[i * chunk_size: (i + 1) * chunk_size], 
                                              x_test.attention_mask[i * chunk_size: (i + 1) * chunk_size]], 
                                              list(test['reviewText'].values[i * chunk_size: (i + 1) * chunk_size]), 
                                              np.array(y_test)[i * chunk_size: (i + 1) * chunk_size])
  test_output = test_output.append(out_df)


out_df = generate_txt_pred(baseline_model, [x_test.input_ids[(i + 1) * chunk_size:], 
                                            x_test.token_type_ids[(i + 1) * chunk_size:], 
                                            x_test.attention_mask[(i + 1) * chunk_size:]], 
                                            list(test['reviewText'].values[(i + 1) * chunk_size:]), 
                                            np.array(y_test)[(i + 1) * chunk_size:])
test_output = test_output.append(out_df)


test_output.to_csv('/content/drive/MyDrive/wzx/baseline/baseline_amazon_full_test_output.csv', index = False)

chunk 0
chunk 1
chunk 2
chunk 3
chunk 4
chunk 5
chunk 6
chunk 7
chunk 8
chunk 9
chunk 10
chunk 11
chunk 12
chunk 13
chunk 14
chunk 15
chunk 16
chunk 17
chunk 18
chunk 19
chunk 20
chunk 21
chunk 22
chunk 23
chunk 24
chunk 25
chunk 26
chunk 27
chunk 28
chunk 29
chunk 30
chunk 31
chunk 32
chunk 33
chunk 34
chunk 35
chunk 36
chunk 37
chunk 38
chunk 39
chunk 40
chunk 41
chunk 42
chunk 43
chunk 44
chunk 45
chunk 46
chunk 47
chunk 48
chunk 49
chunk 50
chunk 51
chunk 52
chunk 53
chunk 54
chunk 55
chunk 56
chunk 57
chunk 58
chunk 59
chunk 60
chunk 61
chunk 62
chunk 63
chunk 64
chunk 65
chunk 66
chunk 67
chunk 68
chunk 69
chunk 70
chunk 71
chunk 72
chunk 73
chunk 74
chunk 75
chunk 76
chunk 77
chunk 78
chunk 79
chunk 80
chunk 81
chunk 82
chunk 83
chunk 84
chunk 85
chunk 86
chunk 87
chunk 88
chunk 89
chunk 90
chunk 91
chunk 92
chunk 93
chunk 94
chunk 95
chunk 96
chunk 97
chunk 98
chunk 99
chunk 100
chunk 101
chunk 102
chunk 103
chunk 104
chunk 105
chunk 106
chunk 107
chunk 108
chunk 109
chunk 110


In [None]:
test_output = pd.read_csv('/content/drive/MyDrive/wzx/baseline/baseline_amazon_full_test_output.csv')

In [None]:
test_output['Correct'].value_counts()

True     429242
False     26538
Name: Correct, dtype: int64

# Error Analysis - Baseline

## Baseline False Negative

### High Probability False Negative

In [None]:
test_output[(test_output['Correct'] == False) & (test_output['Predicted_Label'] == 0 ) & (test_output['Predicted_Prob'] > 0.4)].sort_values(by = 'Predicted_Prob', ascending = True)

Unnamed: 0,Text,Predicted_Prob,Label,Predicted_Label,Correct
57174,It's looks good but the price dam if it was at...,0.400018,1,0.0,False
319113,Entertaining free movie to watch o am a Mark W...,0.400050,1,0.0,False
422465,This was a great hub. Unfortunately it died a...,0.400142,1,0.0,False
254168,They work as advertised. This product didn't s...,0.400146,1,0.0,False
409470,LIGHT WEIGHT AND COMPACT .WORKS LIKE A DREAM. ...,0.400237,1,0.0,False
...,...,...,...,...,...
344064,I can't say enough about this film version of ...,0.499915,1,0.0,False
38519,I got this (CM500) and the TP-LINK Archer C7 A...,0.499918,1,0.0,False
191845,Great movie. Is this the unedited version whe...,0.499974,1,0.0,False
379656,I ordered this one for my sister's Christmas g...,0.499992,1,0.0,False


In [None]:
test_output.loc[57174]['Text']

"It's looks good but the price dam if it was at 227.99 or 225.99 I would of purchase it"

In [None]:
test_output.loc[254168]['Text']

"They work as advertised. This product didn't shake my world, but I'm 68, everything about me shakes.  So if you are looking for a life altering experience, this isn't it.\n\nBut...\n\nIf you are looking for a reliable CD-R this is it."

In [None]:
test_output.loc[86682]['Text']

'I am in the process of moving my pictures off of CDs and onto jump drives and am looking for high quality, high storage devices. Lexar is a trusted brand, so I was excited to find such a deal on this 8GB disk.\n\nI found the disk to perform as expected. It hold loads of pictures, creating an easy way for me to organize my photos from my sisters wedding, from shower to wedding, as well as all the files that go along with planning a wedding. I had no problems with speed as I added files to the disk, t which is a nice aspect of using a smaller size disk.\n\nIn terms of the design of the disk itself, the sleek black look is nice. It has a large hoop on the end, making it super easy to attach to a key chain or other something similar to keep from getting lost.  The blue drive itself closes into the black casing, which makes me feel better knowing the information on it is protected at all costs.  It is nice to know it will not accidentally open when in a bag or purse. Once the drive is pull

In [None]:
test_output.loc[379656]['Text']

"I ordered this one for my sister's Christmas gift because I liked the one I bought, from another company, so much!  Good wireless distance, page forward/reverse browser buttons and 3 programmable buttons.  I've had the same batteries in mine for 6 months now.  The ball pops right out for easy cleaning.  One of the most important aspects, I've dropped it several times and the worse that happened is the ball come loose (once) and I had to chase it."

The above represent the cases where the baseline model predict negative but still gives a relatively high probability among all negative predictions. This can be interpreted as the model sees sign of positive sentiment but is still reluctant to predict positive, thus predicting negative with a relatively high predicted probability. The confusion here is that authors seem to agree that products are good but still show dissatisfaction for some other reasons. In these cases, labels seem to focus on whether the authors think good of a product but the model focuses on whether the author is happy or not from the texts. 

### Low Probability False Negative

In [None]:
test_output[(test_output['Correct'] == False) & (test_output['Predicted_Label'] == 0 ) & (test_output['Predicted_Prob'] < 0.2)].sort_values(by = 'Predicted_Prob', ascending = True)

Unnamed: 0,Text,Predicted_Prob,Label,Predicted_Label,Correct
240384,"Poor acting, tired storyline.",0.000368,1,0.0,False
329960,"Waste of good actors ! Another ""Good VS Evil""...",0.000372,1,0.0,False
190683,Just no working volume control,0.000379,1,0.0,False
437074,This product failed to work after 6 months. I ...,0.000402,1,0.0,False
94161,I just lost interest quickly.\nDidn't find thi...,0.000404,1,0.0,False
...,...,...,...,...,...
203457,"Excellent documentary. However, it's like a f...",0.199738,1,0.0,False
149190,"""The left handed gun"" was part - with ""The nak...",0.199851,1,0.0,False
346650,I bought this for my Niece & she loved it & st...,0.199892,1,0.0,False
406831,A group of job applicants gather in a board ro...,0.199915,1,0.0,False


In [None]:
test_output.loc[240384]['Text']


'Poor acting, tired storyline.'

This review has 5 stars but clearly it shows no sign of anything positive. From the text, we can tell that the author is not happy and yet still give a postive rating. This is an example of comment text that can confuse the model in a sarcastic way. But note that the author uses sarcasm in assigning the label but not in the language used. Therefore, it should be expected our ensemble model cannot learn this sophistication either as it makes decision based on the text only.

## Baseline False Positive

### High Probability False Positive

In [None]:
test_output[(test_output['Correct'] == False) & (test_output['Predicted_Label'] == 1 ) & (test_output['Predicted_Prob'] > 0.8)].sort_values(by = 'Predicted_Prob', ascending = True)

Unnamed: 0,Text,Predicted_Prob,Label,Predicted_Label,Correct
165350,"The aphorism ""you get what you pay for"" applie...",0.800257,0,1.0,False
311236,"We Love Picture 3D Smart Plasma HDTV, sound ha...",0.800669,0,1.0,False
375857,I hate to be one of those guys you always see ...,0.800877,0,1.0,False
78652,this is a really good product i bought a few m...,0.800889,0,1.0,False
383470,While the story keeps us in our seat for 90 mi...,0.800936,0,1.0,False
...,...,...,...,...,...
271897,Just what I need for my tablet; easy to use. ...,0.999009,0,1.0,False
267560,My son wanted this to add to his collection. I...,0.999011,0,1.0,False
67554,My son wanted this to add to his collection. I...,0.999011,0,1.0,False
51594,Exactly as described and fast shipping. Excell...,0.999012,0,1.0,False


In [None]:
test_output.loc[167740]['Text']

'Love it, comfortable and has nice inside to protect my equipment.'

In [None]:
test_output.loc[271897]['Text']

'Just what I need for my tablet; easy to use.  A reliable Samsung product; my second purchase within a month (one for my smartphone as well).  You can rely on Samsung.'

In [None]:
test_output.loc[51594]['Text']

'Exactly as described and fast shipping. Excellent product!'

This is an example of mislabelled data

### Low Probability False Positive

In [None]:
test_output[(test_output['Correct'] == False) & (test_output['Predicted_Label'] == 1 ) & (test_output['Predicted_Prob'] < 0.6)].sort_values(by = 'Predicted_Prob', ascending = True).tail(20)

Unnamed: 0,Text,Predicted_Prob,Label,Predicted_Label,Correct
387033,"It works fine. But it uses an odd sized plug, ...",0.598788,0,1.0,False
162714,The speaker looks good and was functional righ...,0.598927,0,1.0,False
354648,When Elvis Presley was near the end of his lif...,0.598962,0,1.0,False
151506,"I'm all for movies that are based on books, I ...",0.598974,0,1.0,False
266265,As a precursor: I'm the owner and lead install...,0.599028,0,1.0,False
128035,There must be a manufacturing plant that's cre...,0.599041,0,1.0,False
216921,"looks like a class project, hopefully they got...",0.599225,0,1.0,False
311165,Despite its popularity I wasn't crazy about th...,0.599275,0,1.0,False
156907,HOW TO TAKE RING ADAPTER OFF from lens. I thi...,0.599445,0,1.0,False
284672,I've been using this mount for over a month no...,0.59946,0,1.0,False


In [None]:
test_output.loc[344003]['Text']

'I use this for my iPhone 6s and it fit well. Nice first step to VR. Comfortable enough and straps are stable.\nOne major fault is the suction cups to hold the phone in place. It will get loose and slide around. This was a deal breaker for me and bought something else that would keep my phone in place.'

In [None]:
test_output.loc[143236]['Text']
# ensemble might work

'Big.........so big. Size are differents for lens.'

In [None]:
test_output.loc[100611]['Text']
# ensemble might work

'Why pay $15 for a harness made from $1 worth of parts?  So that everything will fit with no hassles.  So you don\'t have to reverse engineer the connector.  So you won\'t have to be cutting and stripping cable, and crimping on connectors. To save time.\n\nSo I ordered this cord at the same time as the Escort 9500ix, and a visor bracket.  Overnight, so it would arrive the day before a big road trip.\n\nI arrived home after work to the smiley Amazon box.  Everything was there, and the shipping was overnight as promised. (In fact, it was there at 8:45 the next morning.)  I had already researched where to tap into the car\'s electrical system so started in right away.  Closed the supplied insulation displacement splice onto the correct wire (purple with white stripe, in this case) and fed the wire through to where it needed to pop out of the trim.  Secured the ground screw.  And went to put the 1/4" bladed fastener onto the blue splice connector.  Except that it wasn\'t a 1/4" bladed fast

In [None]:
test_output.loc[277554]['Text']
# ensemble might work

"Great sound and great features. The radio tuning dial didn't always make the channel indicator move while tuning.\n\nThe picture shows amazon's crappy packaging."

In [None]:
test_output.loc[435462]['Text']

"I took well over 300 pictures using many combination of settings (auto, scene modes, VR, flash, ISO, etc) at my daughters indoor dance show and unfortunately was not able to find a setting that is going to work acceptably for this situation (indoor, medium light). I found that VR on this camera didn't seem to help much at all if there is much movement in the subject. I also was not able to get pictures of my kids at a birthday party that are acceptable - there is some level of blurriness in almost all of the shots.\n\nI thought I was having some success with the sports mode (those pics looked Ok on the LCD), but once transferred to the PC, they were unacceptable, even though I used the highest resolution available. I see that Nikon even suggests a different camera for people wanting good indoor or low light photo capability in their recent advertising, so even they don't recommend this model for indoor pics.\n\nSo this camera seems good for outdoors with lots of light, when taking pho

In [None]:
test_output.loc[162714]['Text']

"The speaker looks good and was functional right out of the box. For the price, and based on the other reviews, I had expected the speaker's bass to be less than stellar, but that wasn't the case. I found the bass to be adequate, but the treble range is really disappointing. Sounds like it's coming from laptop speakers or something. There's also a scratchy tissue paper sound on most songs that indicates a poor quality speaker. After trying to listen for about an hour, I can't abide it, and will be returning it immediately."