

# Stress Detection

## Import and Install Packages

In [3]:


!pip install pydot --quiet
!pip install gensim==3.8.3 --quiet
!pip install tensorflow-datasets --quiet
!pip install -U tensorflow-text==2.8.2 --quiet
!pip install transformers --quiet
!pip install pydot --quiet

[K     |████████████████████████████████| 24.2 MB 1.5 MB/s 
[K     |████████████████████████████████| 4.9 MB 4.8 MB/s 
[K     |████████████████████████████████| 4.7 MB 4.9 MB/s 
[K     |████████████████████████████████| 596 kB 68.9 MB/s 
[K     |████████████████████████████████| 6.6 MB 52.8 MB/s 
[K     |████████████████████████████████| 101 kB 14.6 MB/s 
[?25h

In [4]:
# import packages
import pandas as pd
import numpy as np
import tensorflow as tf
import transformers
from transformers import BertTokenizer, TFBertModel
import tqdm
from keras.preprocessing import sequence
import tensorflow_text as tf_text

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
import os
# Set directory to the directory where the data file is located
os.chdir('/content/drive/MyDrive/wzx/data')

In [6]:
### Read data
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [7]:
# Display first two rows
train.head(2)

Unnamed: 0,reviewText,rating_label
0,Hated,0
1,I understand people's frustration and i don't ...,0


In [None]:
## Display first data point text
train['text'][0]

'He said he had not felt that way before, suggeted I go rest and so ..TRIGGER AHEAD IF YOUI\'RE A HYPOCONDRIAC LIKE ME: i decide to look up "feelings of doom" in hopes of maybe getting sucked into some rabbit hole of ludicrous conspiracy, a stupid "are you psychic" test or new age b.s., something I could even laugh at down the road. No, I ended up reading that this sense of doom can be indicative of various health ailments; one of which I am prone to.. So on top of my "doom" to my gloom..I am now f\'n worried about my heart. I do happen to have a physical in 48 hours.'

In [8]:
bert_tokenizer = transformers.BertTokenizer.from_pretrained('bert-large-uncased')
bert_model = TFBertModel.from_pretrained('bert-base-cased')

Downloading vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading tf_model.h5:   0%|          | 0.00/502M [00:00<?, ?B/s]

Some layers from the model checkpoint at bert-base-cased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:

############################################# BERT  #############################################

## Prepare Data & Bert Embedding

In [12]:
#### Prepare Data

# Training
train_list = []
train_label = []
for index, row in train.iterrows():
  train_list.append(row['reviewText'])
  train_label.append(row['rating_label'])

# Testing
test_list = []
test_label = []
for index, row in test.iterrows():
  test_list.append(row['reviewText'])
  test_label.append(row['rating_label'])

print("Number of Training Data:",len(train_label))
print("Number of Test Data:",len(test_label))

Number of Training Data: 467756
Number of Test Data: 146175


In [None]:
# Set token size to be 50
max_length = 50 

x_train = bert_tokenizer(train_list, 
              max_length=max_length,
              truncation=True,
              padding='max_length', 
              return_tensors='tf')
y_train = train_label

x_test = bert_tokenizer(test_list, 
              max_length=max_length,
              truncation=True,
              padding='max_length', 
              return_tensors='tf')
y_test = test_label

## BERT Model for Stress Detection

In [23]:
#### 1. Bert Model
def create_bert_model(hidden_size = 100, 
                          dropout=0.3,
                          learning_rate=0.00005,               
                      ):
    """
    Build a simple classification model with BERT
    """
    # Inputs
    input_ids = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int64, name='input_ids_layer') 
    token_type_ids = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int64, name='token_type_ids_layer')
    attention_mask = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int64, name='attention_mask_layer')

    bert_inputs = {'input_ids': input_ids,
                   'token_type_ids': token_type_ids,
                   'attention_mask': attention_mask}         

    bert_out = bert_model(bert_inputs) 

    # Take average of classification token
    avg_token = tf.math.reduce_mean(bert_out[0],axis=1)

    # Dense Hidden Layer
    hidden = tf.keras.layers.Dense(hidden_size, activation='relu', name='hidden_layer')(avg_token)
    hidden = tf.keras.layers.Dropout(dropout)(hidden)  


    # concat_layer = tf.keras.layers.concatenate([layer1, layer2, layer3])
    classification = tf.keras.layers.Dense(1, activation='sigmoid',name='classification_layer')(hidden)

    # Model Input & Output
    classification_model = tf.keras.Model(inputs=[input_ids, token_type_ids, attention_mask], outputs=[classification])
    
    classification_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                            loss=tf.keras.losses.BinaryCrossentropy(from_logits=False), 
                            metrics='accuracy') 

    ### END YOUR CODE
    
    return classification_model

In [24]:
bert_model = create_bert_model()
bert_model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 attention_mask_layer (InputLay  [(None, 50)]        0           []                               
 er)                                                                                              
                                                                                                  
 input_ids_layer (InputLayer)   [(None, 50)]         0           []                               
                                                                                                  
 token_type_ids_layer (InputLay  [(None, 50)]        0           []                               
 er)                                                                                              
                                                                                              

In [None]:
## BERT MODEL TRAINING
checkpoint_path = '/content/drive/MyDrive/wzx/stress_model/stress_weights.h5'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path ,
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

bert_model.fit([x_train.input_ids, x_train.token_type_ids, x_train.attention_mask], 
                                                  np.array(y_train),   
                                                  validation_data=([x_test.input_ids, x_test.token_type_ids, x_test.attention_mask], np.array(y_test)),    
                                                  batch_size=8, 
                                                  epochs=10,
                                                  callbacks=[model_checkpoint_callback])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f3d4f132890>

## Import Model & Model Weight

In [None]:
##### TEST 
checkpoint_path = '/content/drive/MyDrive/wzx/stress_model'
model = create_bert_model()
model.load_weights(checkpoint_path+'/bert_weights.h5')

In [None]:
score = model.evaluate([x_test.input_ids, x_test.token_type_ids, x_test.attention_mask], np.array(y_test))

