<a href="https://colab.research.google.com/github/wtsyang/UserIntentPrediction/blob/BERT/BERT/BiLSTM-Context-encodeSentence.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
from google.colab import drive # import drive from google colab

ROOT = "/content/drive"     # default location for the drive
print(ROOT)                 # print content of ROOT (Optional)

drive.mount(ROOT)           # we mount the google drive at /content/drive

/content/drive
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [17]:
%cd '/content/drive/My Drive/UserIntentPrediction'

/content/drive/My Drive/UserIntentPrediction


In [18]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Embedding, SpatialDropout1D, add, concatenate
from tensorflow.keras.layers  import LSTM, Bidirectional, GlobalMaxPooling1D, GlobalAveragePooling1D
from tensorflow.keras.preprocessing import text, sequence
from gensim.models import KeyedVectors
from sklearn.model_selection  import train_test_split
import pickle
import sklearn
from tensorflow.keras.utils import multi_gpu_model
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras import backend as K
from tensorflow.python.framework import ops
from tensorflow.python.ops import math_ops
from tensorflow.python.framework import smart_cond
from functools import partial
from sklearn.metrics import precision_recall_fscore_support
print('Tensorflow Version:',tf.__version__)

Tensorflow Version: 2.2.0-rc3


In [0]:
NUM_MODELS = 1
BATCH_SIZE = 32
LSTM_UNITS = 64
DENSE_HIDDEN_UNITS = 4 * LSTM_UNITS 
EPOCHS = 15
MAX_LEN = 1259
N_CHANNELS=768
N_CLASS=12

## Loading the dataset


In [0]:
Train=pd.read_csv('data/Train_Preprocessing.csv').reset_index(drop=True)
Valid=pd.read_csv('data/Valid_Preprocessing.csv').reset_index(drop=True)
Test=pd.read_csv('data/Test_Preprocessing.csv').reset_index(drop=True)

In [0]:
class DataGenerator(tf.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, pdDataFrame, dbName, labels=['oQ', 'RQ', 'CQ', 'FD', 'FQ', 'IR', 'PA', 'PF', 'NF', 'GG', 'JK', 'O'],\
                 batch_size=BATCH_SIZE, dim=MAX_LEN, n_channels=N_CHANNELS,\
                 n_classes=N_CLASS, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = pdDataFrame
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.dbName=dbName
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = self.list_IDs.iloc[indexes,:]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp.reset_index(drop=True))

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.zeros((self.batch_size, self.dim*3, self.n_channels))
        y = np.zeros((self.batch_size,self.n_classes), dtype=int)

        # Generate data
        for i in range(len(list_IDs_temp)):
            pos=list_IDs_temp.loc[i,'utterance_pos']
            utterenceID=list_IDs_temp.loc[i,'id']
            diaglogID=list_IDs_temp.loc[i,'diaglogID']
            
            if pos==1:
              try:
                temp=np.load('BERT/vector/'+self.dbName+'_'+str(utterenceID)+'_'+str(diaglogID)+'.npy')
              except:
                temp=np.zeros(( 512, self.n_channels))

            elif pos==2:
              try:
                temp1=np.load('BERT/vector/'+self.dbName+'_'+str(utterenceID-1)+'_'+str(diaglogID)+'.npy')
              except:
                temp1=np.ones(( 512, self.n_channels))

              try:
                temp2=np.load('BERT/vector/'+self.dbName+'_'+str(utterenceID)+'_'+str(diaglogID)+'.npy')
              except:
                temp2=np.zeros(( 512, self.n_channels))

              temp=np.concatenate((temp1,temp2),axis=0)

            elif pos>=3:  
              try:
                temp1=np.load('BERT/vector/'+self.dbName+'_'+str(utterenceID-2)+'_'+str(diaglogID)+'.npy')
              except:
                temp1=np.ones(( 512, self.n_channels))

              try:
                temp2=np.load('BERT/vector/'+self.dbName+'_'+str(utterenceID-1)+'_'+str(diaglogID)+'.npy')
              except:
                temp2=np.ones(( 512, self.n_channels))              
              
              try:
                temp3=np.load('BERT/vector/'+self.dbName+'_'+str(utterenceID)+'_'+str(diaglogID)+'.npy')
              except:
                temp3=np.ones(( 512, self.n_channels))   

              temp=np.concatenate((temp1,temp2,temp3),axis=0)

            # Store sample
            X[i,0:temp.shape[0],:] =temp 

            # Store class
            y[i,:] = np.array(list_IDs_temp.iloc[i,0:self.n_classes])

        Y=[]
        for i in range(self.n_classes):
          Y+=[y[:,i].reshape((self.batch_size,))]
        Pos=np.array(list_IDs_temp.loc[:,'utterance_pos'])
        return [X,Pos], Y

In [0]:
training_generator = DataGenerator(Train,'Train')
validation_generator = DataGenerator(Valid,'Valid')

In [0]:
classWeight_Dict={}
for i in range(N_CLASS):
  ratioTrue=np.sum(Train.iloc[:,i])/len(Train)
  classWeight_Dict['output'+str(i+1)]={0:1+1/((1-ratioTrue)/(ratioTrue)+1),1:1+(1-ratioTrue)/(ratioTrue)/((1-ratioTrue)/(ratioTrue)+1)}

In [0]:
classWeight_Dict['output4'][1]+=1
classWeight_Dict['output4'][0]+=1

In [25]:
classWeight_Dict

{'output1': {0: 1.2345571818407344, 1: 1.7654428181592656},
 'output10': {0: 1.0336144877201687, 1: 1.9663855122798313},
 'output11': {0: 1.0100471347060282, 1: 1.9899528652939718},
 'output12': {0: 1.0018605805011163, 1: 1.9981394194988837},
 'output2': {0: 1.0604068469362442, 1: 1.9395931530637558},
 'output3': {0: 1.074423220044654, 1: 1.925576779955346},
 'output4': {0: 2.247085090548251, 1: 2.752914909451749},
 'output5': {0: 1.0875713222525427, 1: 1.912428677747457},
 'output6': {0: 1.1071694368643017, 1: 1.8928305631356983},
 'output7': {0: 1.3979161498387498, 1: 1.6020838501612502},
 'output8': {0: 1.1070453981642272, 1: 1.8929546018357728},
 'output9': {0: 1.076779955346068, 1: 1.923220044653932}}

## Build the model


In [0]:
def binary_crossentropy(y_true, y_pred, weights,from_logits=False,label_smoothing=0):

    y_pred = ops.convert_to_tensor(y_pred)
    y_true = math_ops.cast(y_true, y_pred.dtype)
    label_smoothing = ops.convert_to_tensor(label_smoothing, dtype=K.floatx())
    def _smooth_labels():
      return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing
    y_true = smart_cond.smart_cond(label_smoothing,_smooth_labels, lambda: y_true)
    
    mask0 = tf.subtract(tf.constant(1.0, dtype=K.floatx()),y_true)
    mask0=tf.math.scalar_mul(tf.constant(weights[0], dtype=K.floatx()),mask0)
    mask1 =tf.math.scalar_mul(tf.constant(weights[1], dtype=K.floatx()),y_true)
    mask=tf.math.add(mask0,mask1)

    return K.mean(tf.math.multiply(K.binary_crossentropy(y_true, y_pred, from_logits=from_logits),mask), axis=-1)

In [0]:
def build_model():
    inputs1 = Input(shape=(MAX_LEN*3,N_CHANNELS))
    inputs2 = Input(shape=(1,))
    x = SpatialDropout1D(0.2)(inputs1)
    x = Bidirectional(LSTM(LSTM_UNITS, return_sequences=True))(x)
    x = Bidirectional(LSTM(LSTM_UNITS, return_sequences=True))(x)

    hidden = concatenate([
        GlobalMaxPooling1D()(x),
        GlobalAveragePooling1D()(x),
        inputs2,
    ])

    hidden = add([hidden, Dense(DENSE_HIDDEN_UNITS+1, activation='relu')(hidden)])
    hidden = add([hidden, Dense(DENSE_HIDDEN_UNITS+1, activation='relu')(hidden)])
    RESULT=[]
    for i in range(N_CLASS):
      RESULT+=[Dense(1, activation='sigmoid',name='output'+str(i+1))(hidden)]
    LOSS={}
    for i in  range(N_CLASS):
      LOSS['output'+str(i+1)]=partial(binary_crossentropy, weights=classWeight_Dict['output'+str(i+1)])
      LOSS['output'+str(i+1)].__name__ = 'loss'+str(i+1)

    inputs={'inputs1':inputs1,'inputs2':inputs2}
    model = Model(inputs=inputs, outputs=RESULT)
    model.compile(loss=LOSS, optimizer='adam',metrics=[tf.keras.metrics.binary_accuracy])

    return model

In [28]:
model = build_model()
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 3777, 768)]  0                                            
__________________________________________________________________________________________________
spatial_dropout1d_1 (SpatialDro (None, 3777, 768)    0           input_3[0][0]                    
__________________________________________________________________________________________________
bidirectional_2 (Bidirectional) (None, 3777, 128)    426496      spatial_dropout1d_1[0][0]        
__________________________________________________________________________________________________
bidirectional_3 (Bidirectional) (None, 3777, 128)    98816       bidirectional_2[0][0]            
____________________________________________________________________________________________

In [0]:
callback=ReduceLROnPlateau(patience=1,min_lr=0.00001,factor=0.3)
Name='BERT/BiLSTM_Context_addPositionEncoding.h5'
checkpointer = ModelCheckpoint(filepath=Name, verbose=1, save_best_only=True)

In [30]:
model.fit_generator(
    generator=training_generator,
    validation_data=validation_generator,
    epochs=EPOCHS,
    verbose=1,
    callbacks=[checkpointer,callback])

Epoch 1/15
Epoch 00001: val_loss improved from inf to 4.34725, saving model to BERT/BiLSTM_Context_addPositionEncoding.h5
Epoch 2/15
Epoch 00002: val_loss improved from 4.34725 to 3.96341, saving model to BERT/BiLSTM_Context_addPositionEncoding.h5
Epoch 3/15
Epoch 00003: val_loss improved from 3.96341 to 3.77886, saving model to BERT/BiLSTM_Context_addPositionEncoding.h5
Epoch 4/15
Epoch 00004: val_loss did not improve from 3.77886
Epoch 5/15
Epoch 00005: val_loss improved from 3.77886 to 3.57019, saving model to BERT/BiLSTM_Context_addPositionEncoding.h5
Epoch 6/15
Epoch 00006: val_loss improved from 3.57019 to 3.53784, saving model to BERT/BiLSTM_Context_addPositionEncoding.h5
Epoch 7/15
Epoch 00007: val_loss did not improve from 3.53784
Epoch 8/15
Epoch 00008: val_loss did not improve from 3.53784
Epoch 9/15
Epoch 00009: val_loss did not improve from 3.53784
Epoch 10/15
Epoch 00010: val_loss did not improve from 3.53784
Epoch 11/15
Epoch 00011: val_loss did not improve from 3.53784


<tensorflow.python.keras.callbacks.History at 0x7f50bb176160>

## Prediction




In [31]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 3777, 768)]  0                                            
__________________________________________________________________________________________________
spatial_dropout1d_1 (SpatialDro (None, 3777, 768)    0           input_3[0][0]                    
__________________________________________________________________________________________________
bidirectional_2 (Bidirectional) (None, 3777, 128)    426496      spatial_dropout1d_1[0][0]        
__________________________________________________________________________________________________
bidirectional_3 (Bidirectional) (None, 3777, 128)    98816       bidirectional_2[0][0]            
____________________________________________________________________________________________

In [0]:
model.load_weights(Name)

In [0]:
class testDataGenerator(tf.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, pdDataFrame, dbName, labels=['oQ', 'RQ', 'CQ', 'FD', 'FQ', 'IR', 'PA', 'PF', 'NF', 'GG', 'JK', 'O'],\
                 batch_size=25, dim=MAX_LEN, n_channels=N_CHANNELS,\
                 n_classes=N_CLASS, shuffle=False):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = pdDataFrame
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.dbName=dbName
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = self.list_IDs.iloc[indexes,:]

        # Generate data
        X = self.__data_generation(list_IDs_temp.reset_index(drop=True))

        return X

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        # Initialization
        X = np.zeros((self.batch_size, self.dim*3, self.n_channels))

        # Generate data
        for i in range(len(list_IDs_temp)):
            pos=list_IDs_temp.loc[i,'utterance_pos']
            utterenceID=list_IDs_temp.loc[i,'id']
            diaglogID=list_IDs_temp.loc[i,'diaglogID']
            
            if pos==1:
              try:
                temp=np.load('BERT/vector/'+self.dbName+'_'+str(utterenceID)+'_'+str(diaglogID)+'.npy')
              except:
                temp=np.zeros(( 512, self.n_channels))

            elif pos==2:
              try:
                temp1=np.load('BERT/vector/'+self.dbName+'_'+str(utterenceID-1)+'_'+str(diaglogID)+'.npy')
              except:
                temp1=np.ones(( 512, self.n_channels))

              try:
                temp2=np.load('BERT/vector/'+self.dbName+'_'+str(utterenceID)+'_'+str(diaglogID)+'.npy')
              except:
                temp2=np.zeros(( 512, self.n_channels))

              temp=np.concatenate((temp1,temp2),axis=0)

            elif pos>=3:  
              try:
                temp1=np.load('BERT/vector/'+self.dbName+'_'+str(utterenceID-2)+'_'+str(diaglogID)+'.npy')
              except:
                temp1=np.ones(( 512, self.n_channels))

              try:
                temp2=np.load('BERT/vector/'+self.dbName+'_'+str(utterenceID-1)+'_'+str(diaglogID)+'.npy')
              except:
                temp2=np.ones(( 512, self.n_channels))              
              
              try:
                temp3=np.load('BERT/vector/'+self.dbName+'_'+str(utterenceID)+'_'+str(diaglogID)+'.npy')
              except:
                temp3=np.ones(( 512, self.n_channels))   

              temp=np.concatenate((temp1,temp2,temp3),axis=0)

            # Store sample
            X[i,0:temp.shape[0],:] =temp 

        Pos=np.array(list_IDs_temp.loc[:,'utterance_pos'])
        return [X,Pos]

In [0]:
test_generator=testDataGenerator(Test,'Test')

In [35]:
prediction = model.predict_generator(test_generator)

Instructions for updating:
Please use Model.predict, which supports generators.


In [0]:
Prediction=np.array(prediction)

In [0]:
y_true=np.array(Test.iloc[:,0:N_CLASS])
y_pred=Prediction

In [0]:
def hamming_score(y_true, y_pred, toggle_output=False):
    '''
    Compute the Hamming score (a.k.a. label-based accuracy) for the multi-label case
    https://stackoverflow.com/q/32239577/395857
    '''
    acc_list = []
    for i in range(y_pred.shape[1]):
        set_true = set( np.where(y_true[i,:])[0])
        set_pred = set( np.where(y_pred[:,i,0]>=0.5)[0])
        if toggle_output:
            print('set_true: {0}'.format([id2label[id] for id in set_true]), 'set_pred: {0}'.format([id2label[id] for id in set_pred]))
        tmp_a = None
        if len(set_true) == 0 and len(set_pred) == 0:
            tmp_a = 1
        else:
            tmp_a = len(set_true.intersection(set_pred))/\
                    float( len(set_true.union(set_pred)) )
        #print('tmp_a: {0}'.format(tmp_a))
        acc_list.append(tmp_a)
    return np.mean(acc_list)

In [39]:
hamming_score(y_true, y_pred, toggle_output=False)

0.649063063063063

In [0]:
   
def f1(y_true, y_pred):
    correct_preds, total_correct, total_preds = 0., 0., 0.
    for i in range(y_true.shape[0]):
        set_true = set( np.where(y_true[i,:])[0])
        set_pred = set( np.where(y_pred[:,i,0]>=0.5)[0])
        
        correct_preds += len(set_true & set_pred)
        total_preds += len(set_pred)
        total_correct += len(set_true)

    p = correct_preds / total_preds if correct_preds > 0 else 0
    r = correct_preds / total_correct if correct_preds > 0 else 0
    f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0
    return p, r, f1

In [41]:
f1(y_true, y_pred)

(0.731239092495637, 0.6235119047619048, 0.6730923694779117)