In [1]:
import numpy as np

import tensorflow as tf
import tensorflow_datasets as tfds
from transformers import TFBertForSequenceClassification, TFBertModel


# 1. Preparation for data inputs

In [2]:
# import train/val/test data from the data exploration part

data_prep=np.load('data_prep.npz',allow_pickle=True)
# train_text,train_label=data_prep['train_text'],data_prep['train_label']
# val_text,val_label=data_prep['val_text'],data_prep['val_label']
test_text,test_label=data_prep['test_text'],data_prep['test_label']
voca_set=data_prep['voca_set'].tolist()

# define encoder and tokenizer
tokenizer=tfds.deprecated.text.Tokenizer()
encoder=tfds.deprecated.text.TokenTextEncoder(
    voca_set,tokenizer=tokenizer)

In [3]:
# for Bert model
# ---------------
# import train/val/test data from the data exploration part

data_prep=np.load('data_prep_bert.npz')

# train_reviews=data_prep['train_reviews']
# train_segments=data_prep['train_segments']
# train_masks=data_prep['train_masks']
# train_label_bert=data_prep['train_label']

# val_reviews=data_prep['val_reviews']
# val_segments=data_prep['val_segments']
# val_masks=data_prep['val_masks']
# val_label_bert=data_prep['val_label']

test_reviews=data_prep['test_reviews']
test_segments=data_prep['test_segments']
test_masks=data_prep['test_masks']
test_label_bert=data_prep['test_label']

# function to convert the data into tf.tensor inputs 

def example_to_features(input_ids,attention_masks,token_type_ids,label):
    return {'input_ids':input_ids,
            'attention_mask': attention_masks,
            'token_type_ids': token_type_ids},label

# train_ds=tf.data.Dataset.from_tensor_slices(
#     (train_reviews,train_masks,train_segments,train_label_bert)).map(
#     example_to_features).shuffle(100).batch(16)

# val_ds=tf.data.Dataset.from_tensor_slices(
#     (val_reviews,val_masks,val_segments,val_label_bert)).map(
#     example_to_features).shuffle(100).batch(16)

test_ds=tf.data.Dataset.from_tensor_slices(
    (test_reviews,test_masks,test_segments,test_label_bert)).map(
    example_to_features).batch(16)

# 2. Predictions from different models and Voting for deciding the final outputs

In [10]:
def model_pred(model_i):
    # model_i is the string for model names
    # which can be one of the fiollowings
#             'BiLSTM',
#             'GloVe_Feature_Extraction',
#             'GloVe_fine-tuning',
#             'Bert_pretrained_head',
#             'Bert_custom_head',
#             'Bert_custom_head_fine_tune'

    # return a 1D array with predicted labels

    if model_i=='BiLSTM':

        def LSTM_model(voca_size,embedding_dim,rnn_units,batch_size):
            model=tf.keras.Sequential([
                tf.keras.layers.Embedding(voca_size,embedding_dim,
                                          mask_zero=True,
                                          batch_input_shape=[batch_size,None]),
                tf.keras.layers.Dropout(0.2),
                tf.keras.layers.Bidirectional(
                    tf.keras.layers.LSTM(rnn_units,dropout=0.25)),
                tf.keras.layers.Dense(1,activation='sigmoid')
            ])
            return model

        embedding_dim=64
        
        # length of vocas in chars
        voca_size=encoder.vocab_size

        # number of RNN units
        rnn_units=64

        # batch size
        batch_size=100

        trained_model=LSTM_model(
            voca_size=voca_size,
            embedding_dim=embedding_dim,
            rnn_units=rnn_units,
            batch_size=batch_size)

        # load the model weights
        trained_model.load_weights('model_param/BiLSTM.h5')

        # use the model to make prediction on the test set

        test_outputs=trained_model(test_text)
        test_pred=((test_outputs.numpy()>0.5)*1).flatten()

        return test_pred

    elif 'GloVe' in model_i:

        # load the GloVe embeddings

        dict_w2v={}

        with open(
            'GloVe_dict/glove.6B/glove.6B.50d.txt','r',encoding="utf8") as file:
            for line in file:
                tokens=line.split()
                word=tokens[0]
                vector=np.array(tokens[1:],dtype=np.float32)

                if vector.shape[0]==50:
                    dict_w2v[word]=vector
                else:
                    print('Error with '+word)

        # a matrix relating the vocas in our text set to the GloVe dict

        embedding_dim=50

        embedding_matrix=np.zeros((encoder.vocab_size,embedding_dim))

        for word in encoder.tokens:
            embedding_vec=dict_w2v.get(word)

            if embedding_vec is not None:
                tkn_id=encoder.encode(word)[0]
                embedding_matrix[tkn_id]=embedding_vec

        def LSTM_model(
            voca_size,embedding_dim,rnn_units,batch_size,train_emb=False):

            model=tf.keras.Sequential([
                tf.keras.layers.Embedding(voca_size,embedding_dim,
                                          mask_zero=True,
                                          weights=[embedding_matrix],
                                          trainable=train_emb),
                tf.keras.layers.Dropout(0.2),
                tf.keras.layers.Bidirectional(
                    tf.keras.layers.LSTM(rnn_units,dropout=0.25)),
                tf.keras.layers.Dense(1,activation='sigmoid')
            ])
            return model

        # length of vocas in chars
        voca_size=encoder.vocab_size

        # number of RNN units
        rnn_units=64

        # batch size
        batch_size=100

        trained_model=LSTM_model(
            voca_size=voca_size,
            embedding_dim=embedding_dim,
            rnn_units=rnn_units,
            batch_size=batch_size)

        # load the model weights

        if 'Feature_Extraction' in model_i:

            trained_model.load_weights(
                'model_param/GloVe_Feature_Extraction.h5')

        elif 'fine-tuning' in model_i:

            trained_model.load_weights(
                'model_param/GloVe_fine-tuning.h5')

        else:
            print('Error in model name!')
            return None

        test_outputs=trained_model(test_text)
        test_pred=((test_outputs.numpy()>0.5)*1).flatten()

        return test_pred


    elif 'Bert' in model_i:

        if 'pretrained_head' in model_i:

            bert=TFBertForSequenceClassification.from_pretrained(
                'bert-base-uncased')

            bert.load_weights('model_param/Bert_pretrained_bert.h5')
            
            pred=bert.predict(test_ds)
        
            return tf.argmax(
                tf.nn.softmax(pred.logits,axis=1),axis=1).numpy()

        elif 'custom_head' in model_i:

            model0=TFBertModel.from_pretrained('bert-base-uncased')

            def head(training=None):

                # create the inputs for the BERT model
                max_seq_len=512
                inp_ids=tf.keras.layers.Input(
                    (max_seq_len),dtype=tf.int64,name='input_ids')
                att_mask=tf.keras.layers.Input(
                    (max_seq_len),dtype=tf.int64,name='attention_mask')
                seg_ids=tf.keras.layers.Input(
                    (max_seq_len),dtype=tf.int64,name='token_type_ids')

                inp_dict={'input_ids':inp_ids,
                          'attention_mask':att_mask,
                          'token_type_ids':seg_ids}

                output=model0(inp_dict) # from the untrained BERT network
                x=tf.keras.layers.Dropout(0.2)(output[1],training=training) 
                x=tf.keras.layers.Dense(200,activation='relu')(x)
                x=tf.keras.layers.Dropout(0.2)(x,training=training)
                x=tf.keras.layers.Dense(2,activation='sigmoid')(x)

                model=tf.keras.models.Model(inputs=inp_dict,outputs=x)

                model.compile(
                    optimizer=tf.keras.optimizers.Adam(learning_rate=2E-5),
                    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                    metrics=['accuracy'])

                return model

            bert=head(training=False)

            if 'fine_tune' in model_i:
                bert.load_weights(
                    'model_param/Bert_custom_head_fine_tune.h5')
            else:
                bert.load_weights(
                    'model_param/Bert_custom_head.h5')
                
            pred=bert.predict(test_ds)
            return np.argmax(pred,axis=1)

        else:

            print('Error in model name!')
            return None
        


In [13]:
# model avaialble:[
#     'BiLSTM',
#     'GloVe_Feature_Extraction',
#     'GloVe_fine-tuning',
#     'Bert_pretrained_head',
#     'Bert_custom_head',
#     'Bert_custom_head_fine_tune']

# enter the models to be included in the Ensemble Method
model_inc=['BiLSTM',
           'GloVe_fine-tuning',
           'Bert_pretrained_head',
           'Bert_custom_head']

for i in range(len(model_inc)):
    model_i=model_inc[i]
    print('working on: '+model_i)
    test_outputs=model_pred(model_i)
    if i==0:
        test_pred=((test_outputs>0.5)*1).flatten()
    else:
        test_pred+=((test_outputs>0.5)*1).flatten()
print('-'*40)

# determine the final predicted label by voting
pred_label=(test_pred>(len(model_inc)/2))*1
test_acc=sum(test_label==pred_label)/len(test_pred)
print('test_acc=',test_acc*100,'%')

working on: BiLSTM
working on: GloVe_fine-tuning
working on: Bert_pretrained_head


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


working on: Bert_custom_head


Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


----------------------------------------
test_acc= 94.24 %


In [14]:
# model avaialble:[
#     'BiLSTM',
#     'GloVe_Feature_Extraction',
#     'GloVe_fine-tuning',
#     'Bert_pretrained_head',
#     'Bert_custom_head',
#     'Bert_custom_head_fine_tune']

# enter the models to be included in the Ensemble Method
model_inc=['BiLSTM',
           'GloVe_fine-tuning',
           'Bert_pretrained_head']

for i in range(len(model_inc)):
    model_i=model_inc[i]
    print('working on: '+model_i)
    test_outputs=model_pred(model_i)
    if i==0:
        test_pred=((test_outputs>0.5)*1).flatten()
    else:
        test_pred+=((test_outputs>0.5)*1).flatten()
print('-'*40)

# determine the final predicted label by voting
pred_label=(test_pred>(len(model_inc)/2))*1
test_acc=sum(test_label==pred_label)/len(test_pred)
print('test_acc=',test_acc*100,'%')

working on: BiLSTM
working on: GloVe_fine-tuning
working on: Bert_pretrained_head


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


----------------------------------------
test_acc= 94.15333333333334 %


In [17]:
# model avaialble:[
#     'BiLSTM',
#     'GloVe_Feature_Extraction',
#     'GloVe_fine-tuning',
#     'Bert_pretrained_head',
#     'Bert_custom_head',
#     'Bert_custom_head_fine_tune']

# enter the models to be included in the Ensemble Method
model_inc=['BiLSTM',
           'GloVe_fine-tuning',
           'Bert_custom_head']

for i in range(len(model_inc)):
    model_i=model_inc[i]
    print('working on: '+model_i)
    test_outputs=model_pred(model_i)
    if i==0:
        test_pred=((test_outputs>0.5)*1).flatten()
    else:
        test_pred+=((test_outputs>0.5)*1).flatten()
print('-'*40)

# determine the final predicted label by voting
pred_label=(test_pred>(len(model_inc)/2))*1
test_acc=sum(test_label==pred_label)/len(test_pred)
print('test_acc=',test_acc*100,'%')

working on: BiLSTM
working on: GloVe_fine-tuning
working on: Bert_custom_head


Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


----------------------------------------
test_acc= 94.30666666666667 %


In [15]:
# model avaialble:[
#     'BiLSTM',
#     'GloVe_Feature_Extraction',
#     'GloVe_fine-tuning',
#     'Bert_pretrained_head',
#     'Bert_custom_head',
#     'Bert_custom_head_fine_tune']

# enter the models to be included in the Ensemble Method
model_inc=['BiLSTM',
           'Bert_pretrained_head',
           'Bert_custom_head']

for i in range(len(model_inc)):
    model_i=model_inc[i]
    print('working on: '+model_i)
    test_outputs=model_pred(model_i)
    if i==0:
        test_pred=((test_outputs>0.5)*1).flatten()
    else:
        test_pred+=((test_outputs>0.5)*1).flatten()
print('-'*40)

# determine the final predicted label by voting
pred_label=(test_pred>(len(model_inc)/2))*1
test_acc=sum(test_label==pred_label)/len(test_pred)
print('test_acc=',test_acc*100,'%')

working on: BiLSTM
working on: Bert_pretrained_head


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


working on: Bert_custom_head


Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


----------------------------------------
test_acc= 94.77333333333333 %


In [18]:
# model avaialble:[
#     'BiLSTM',
#     'GloVe_Feature_Extraction',
#     'GloVe_fine-tuning',
#     'Bert_pretrained_head',
#     'Bert_custom_head',
#     'Bert_custom_head_fine_tune']

# enter the models to be included in the Ensemble Method
model_inc=['GloVe_fine-tuning',
           'Bert_pretrained_head',
           'Bert_custom_head']

for i in range(len(model_inc)):
    model_i=model_inc[i]
    print('working on: '+model_i)
    test_outputs=model_pred(model_i)
    if i==0:
        test_pred=((test_outputs>0.5)*1).flatten()
    else:
        test_pred+=((test_outputs>0.5)*1).flatten()
print('-'*40)

# determine the final predicted label by voting
pred_label=(test_pred>(len(model_inc)/2))*1
test_acc=sum(test_label==pred_label)/len(test_pred)
print('test_acc=',test_acc*100,'%')

working on: GloVe_fine-tuning
working on: Bert_pretrained_head


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


working on: Bert_custom_head


Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


----------------------------------------
test_acc= 94.82000000000001 %
