In [1]:
import sys
from absl import flags
sys.argv=['preserve_unused_tokens=False']
flags.FLAGS(sys.argv)

['preserve_unused_tokens=False']

In [2]:
pip install bert-tensorflow

Note: you may need to restart the kernel to use updated packages.


In [3]:
# Downloading tokenization script created by the Google
!wget --quiet https://raw.githubusercontent.com/tensorflow/models/master/official/nlp/bert/tokenization.py

In [16]:
from random import randint
from numpy import array
from numpy import argmax
import keras.backend as K
from tensorflow.keras import models
from numpy import array_equal
import numpy as np
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Bidirectional
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras import Input
from tensorflow.keras.layers import TimeDistributed
from tensorflow.keras.layers import RepeatVector
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
import tensorflow_hub as hub
from sklearn.model_selection import train_test_split
import pandas as pd
from tensorflow.keras.callbacks import ModelCheckpoint
from bert import tokenization
from tensorflow.keras.layers import Lambda
from tensorflow.keras import backend as K


In [5]:
image_shape = (256,256,3)
trainable = False
max_seq_length = 128
units = 512
embedding_dim = 768
batch_sz = 7
BUFFER_SIZE = 200
attention_features_shape = 64

In [6]:
img_feature = np.load('Image_Embeddings/Emb_feature/new_test_features/.npy')
np.shape(img_feature)

(8313, 100, 1024)

In [7]:
FMS = pd.read_csv("FMS_final.csv")
FMS['textNdesc'] = 'In the picture '+ FMS.gen_caption + ' And the text says: ' + FMS.text
print(FMS.textNdesc[0], '\n\n')
print(len(FMS))

In the picture man in black and white cap is holding up sign . And the text says: its their character not their color that matters 


8313


In [8]:
class BahdanauAttention(tf.keras.Model):
    def __init__(self, units):
      super(BahdanauAttention, self).__init__()
      self.W1 = tf.keras.layers.Dense(units)
      self.W2 = tf.keras.layers.Dense(units)
      self.V = tf.keras.layers.Dense(1)

    def call(self, features, hidden):
      hidden_with_time_axis = tf.expand_dims(hidden, 1)

      attention_hidden_layer = (tf.nn.tanh(self.W1(features) +
                                          self.W2(hidden_with_time_axis)))

      score = self.V(attention_hidden_layer)
      attention_weights = tf.nn.softmax(score, axis=1)
      context_vector = attention_weights * features
      context_vector = tf.reduce_sum(context_vector, axis=1)

      return context_vector, attention_weights

In [9]:
def bert_encode(texts, tokenizer, max_len=512):
    all_tokens = []
    all_masks = []
    all_segments = []
    
    for text in texts:
        text = tokenizer.tokenize(text)
            
        text = text[:max_len-2]
        input_sequence = ["[CLS]"] + text + ["[SEP]"]
        pad_len = max_len - len(input_sequence)
        
        tokens = tokenizer.convert_tokens_to_ids(input_sequence)
        tokens += [0] * pad_len
        pad_masks = [1] * len(input_sequence) + [0] * pad_len
        segment_ids = [0] * max_len
        
        all_tokens.append(tokens)
        all_masks.append(pad_masks)
        all_segments.append(segment_ids)
    
    return np.array(all_tokens), np.array(all_masks), np.array(all_segments)

In [10]:
def build_model(bert_layer, max_len):
    # hidden = tf.zeros((1, 1024))

    enc_hidden = [tf.zeros((batch_sz, 1024)), tf.zeros((batch_sz, 1024))]

    input_word_ids = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name="input_word_ids")
    input_mask = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name="input_mask")
    segment_ids = tf.keras.Input(shape=(max_len,), dtype=tf.int32, name="segment_ids")
    _, sequence_output = bert_layer([input_word_ids, input_mask, segment_ids])
    clf_output = sequence_output[:, 0, :]
    dec_input = tf.expand_dims(clf_output, axis = 1)

    # image input
    # image_input = tf.keras.Input(shape=(100,1024), batch_size=None, name="image_input")
    # image_input = tf.keras.layers.Dense(1024,kernel_initializer='glorot_uniform',use_bias=False)(image_input)
    # image_input = tf.nn.relu(image_input)

    encoder_inputs = Input(shape=(100, 1024), name='encoder_inputs')
    encoder_lstm = LSTM(1024,return_sequences=True, return_state=True,  name='encoder_lstm')
    encoder_outputs, encoder_state_h, encoder_state_c = encoder_lstm(encoder_inputs, initial_state = enc_hidden)
    # print(np.shape(encoder_outputs))
    # print(np.shape(encoder_state_h))
    # print(np.shape(encoder_state_c))

    encoder_states = [encoder_state_h, encoder_state_c]

    # Set up the decoder layers
    # decoder_inputs = Input(shape=(),name='decoder_inputs')
    decoder_lstm = LSTM(1024,  return_state=True, name='decoder_lstm')
    # decoder_gru = tf.keras.layers.GRU(1024, return_sequences=True, return_state=True, recurrent_initializer='glorot_uniform')
    decoder_fc1 = tf.keras.layers.Dense(1024)
    decoder_fc2 = tf.keras.layers.Dense(128, activation='relu', name='Dense_3_layer')
    decoder_dropout = tf.keras.layers.Dropout(0.3)
    decoder_fc3 = tf.keras.layers.Dense(1, activation='sigmoid', name='classifier')


    all_outputs = []
    decoder_outputs = encoder_state_h
    states = encoder_states 


    # decoder layer
    attention = BahdanauAttention(1024)

    for _ in range(16):
        context_vector, attention_weights = attention(encoder_outputs, decoder_outputs)
        inputs = tf.concat([tf.expand_dims(context_vector, 1), dec_input], axis=-1)
        
        decoder_outputs, state_h, state_c = decoder_lstm(inputs,initial_state=states)
        x = decoder_fc1(decoder_outputs)
        # print(np.shape(x))
        # x = tf.reshape(x, (-1, x.shape[2]))
        x = decoder_fc2(x)
        x = decoder_dropout(x)
        outputs = decoder_fc3(x)
        # outputs = tf.expand_dims(outputs, 1)
        all_outputs.append(outputs)
        # 7. Reinject the output (prediction) as inputs for the next loop iteration
        # as well as update the states
        inputs = outputs
        states = [state_h, state_c]

    
    decoder_outputs = tf.keras.layers.Average()(all_outputs)
    
    model_encoder_decoder_Bahdanau_Attention = Model([input_word_ids, input_mask, segment_ids, encoder_inputs], decoder_outputs, name='model_encoder_decoder')
    model_encoder_decoder_Bahdanau_Attention.compile(Adam(lr=1e-5), loss='binary_crossentropy', metrics=['accuracy', 'AUC'])

    return model_encoder_decoder_Bahdanau_Attention

In [11]:
%%time
model_url = "https://tfhub.dev/tensorflow/bert_en_uncased_L-24_H-1024_A-16/1"
model_url2 = "https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-768_A-12/1"
bert_layer = hub.KerasLayer(model_url, trainable=True)

Wall time: 14.1 s


In [12]:
vocab_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()
do_lower_case = bert_layer.resolved_object.do_lower_case.numpy()
tokenizer = tokenization.FullTokenizer(vocab_file, do_lower_case)

In [13]:
train_input = bert_encode(FMS.textNdesc.values, tokenizer, max_len=160)
train_labels = FMS.label.values

In [14]:
model = build_model(bert_layer, max_len=160)
model.summary()

Model: "model_encoder_decoder"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 encoder_inputs (InputLayer)    [(None, 100, 1024)]  0           []                               
                                                                                                  
 input_word_ids (InputLayer)    [(None, 160)]        0           []                               
                                                                                                  
 input_mask (InputLayer)        [(None, 160)]        0           []                               
                                                                                                  
 segment_ids (InputLayer)       [(None, 160)]        0           []                               
                                                                              

  super().__init__(name, **kwargs)


In [18]:
%%time
checkpoint = ModelCheckpoint('model6.h5', monitor='val_loss', save_best_only=True)

train_history = model.fit(
    [train_input, img_feature], train_labels,
    validation_split=0.3,
    epochs=100 ,
    callbacks=[checkpoint],
    batch_size=batch_sz,
    steps_per_epoch=12   
)

Epoch 1/100
 1/12 [=>............................] - ETA: 5:52 - loss: 0.8331 - accuracy: 0.4286 - auc: 0.4583