<a href="https://colab.research.google.com/github/wtsyang/UserIntentPrediction/blob/BERT/BERT/LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from google.colab import drive # import drive from google colab

ROOT = "/content/drive"     # default location for the drive
print(ROOT)                 # print content of ROOT (Optional)

drive.mount(ROOT)           # we mount the google drive at /content/drive

/content/drive
Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
%cd '/content/drive/My Drive/UserIntentPrediction'

/content/drive/My Drive/UserIntentPrediction


In [0]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Embedding, SpatialDropout1D, add, concatenate
from tensorflow.keras.layers  import LSTM, Bidirectional, GlobalMaxPooling1D, GlobalAveragePooling1D
from tensorflow.keras.preprocessing import text, sequence
from gensim.models import KeyedVectors
from sklearn.model_selection  import train_test_split
import pickle
import sklearn
from tensorflow.keras.utils import multi_gpu_model
from tensorflow.keras.callbacks import ModelCheckpoint
print('Tensorflow Version:',tf.__version__)

Tensorflow Version: 2.2.0-rc2


In [0]:
NUM_MODELS = 1
BATCH_SIZE = 32
LSTM_UNITS = 256
DENSE_HIDDEN_UNITS = 4 * LSTM_UNITS
EPOCHS = 16
MAX_LEN = 1259
N_CHANNELS=768
N_CLASS=12

## Loading the dataset


In [0]:
Train=pd.read_csv('data/Train_Preprocessing.csv').reset_index(drop=True)
Valid=pd.read_csv('data/Valid_Preprocessing.csv').reset_index(drop=True)
Test=pd.read_csv('data/Test_Preprocessing.csv').reset_index(drop=True)

In [0]:
class DataGenerator(tf.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, pdDataFrame, dbName, labels=['oQ', 'RQ', 'CQ', 'FD', 'FQ', 'IR', 'PA', 'PF', 'NF', 'GG', 'JK', 'O'],\
                 batch_size=BATCH_SIZE, dim=MAX_LEN, n_channels=N_CHANNELS,\
                 n_classes=N_CLASS, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = pdDataFrame
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.dbName=dbName
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = self.list_IDs.iloc[indexes,:]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp.reset_index(drop=True))

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.zeros((self.batch_size, self.dim, self.n_channels))
        y = np.zeros((self.batch_size,self.n_classes), dtype=int)

        # Generate data
        for i in range(len(list_IDs_temp)):
            utterenceID=list_IDs_temp.loc[i,'id']
            diaglogID=list_IDs_temp.loc[i,'diaglogID']
            try:
              temp=np.load('BERT/vector/'+self.dbName+'_'+str(utterenceID)+'_'+str(diaglogID)+'.npy')
              X[i,0:temp.shape[0],:] =temp 
              del temp
            except:
              print('Faile to load the data: BERT/vector/'+self.dbName+'_'+str(utterenceID)+'_'+str(diaglogID)+'.npy')
            # Store sample
            # Store class
            y[i,:] = np.array(list_IDs_temp.iloc[i,0:12])

        return X, y

In [0]:
training_generator = DataGenerator(Train,'Train')
validation_generator = DataGenerator(Valid,'Valid')

## Build the model


In [0]:
def build_model():
    inputs = Input(shape=(MAX_LEN,N_CHANNELS))
    x = SpatialDropout1D(0.2)(inputs)
    x = Bidirectional(LSTM(LSTM_UNITS, return_sequences=True))(x)
    x = Bidirectional(LSTM(LSTM_UNITS, return_sequences=True))(x)

    hidden = concatenate([
        GlobalMaxPooling1D()(x),
        GlobalAveragePooling1D()(x),
    ])
    hidden = add([hidden, Dense(DENSE_HIDDEN_UNITS, activation='relu')(hidden)])
    hidden = add([hidden, Dense(DENSE_HIDDEN_UNITS, activation='relu')(hidden)])
    result = Dense(N_CLASS, activation='sigmoid')(hidden)
    #aux_result = Dense(num_aux_targets, activation='sigmoid')(hidden)
    
    #model = Model(inputs=words, outputs=[result, aux_result])
    model = Model(inputs=inputs, outputs=result)
    model.compile(loss='binary_crossentropy', optimizer='adam',metrics=[tf.keras.metrics.MeanIoU(num_classes=N_CLASS),tf.keras.metrics.binary_accuracy])

    return model

In [0]:
model = build_model()
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 1259, 768)]  0                                            
__________________________________________________________________________________________________
spatial_dropout1d_2 (SpatialDro (None, 1259, 768)    0           input_3[0][0]                    
__________________________________________________________________________________________________
bidirectional_4 (Bidirectional) (None, 1259, 512)    2099200     spatial_dropout1d_2[0][0]        
__________________________________________________________________________________________________
bidirectional_5 (Bidirectional) (None, 1259, 512)    1574912     bidirectional_4[0][0]            
____________________________________________________________________________________________

In [0]:
checkpointer = ModelCheckpoint(filepath='BERT/LSTM.h5', verbose=1, save_best_only=True)
model.fit_generator(
    generator=training_generator,
    validation_data=validation_generator,
    epochs=EPOCHS,
    verbose=1,
    callbacks=[checkpointer])

ERROR! Session/line number was not unique in database. History logging moved to new session 59
Epoch 1/16
Epoch 00001: val_loss improved from inf to 0.20306, saving model to BERT/LSTM.h5
Epoch 2/16
Epoch 00002: val_loss improved from 0.20306 to 0.18470, saving model to BERT/LSTM.h5
Epoch 3/16
Epoch 00003: val_loss improved from 0.18470 to 0.17644, saving model to BERT/LSTM.h5
Epoch 4/16
Epoch 00004: val_loss improved from 0.17644 to 0.17212, saving model to BERT/LSTM.h5
Epoch 5/16
Epoch 00005: val_loss improved from 0.17212 to 0.17055, saving model to BERT/LSTM.h5
Epoch 6/16
Epoch 00006: val_loss did not improve from 0.17055
Epoch 7/16
Epoch 00007: val_loss did not improve from 0.17055
Epoch 8/16
Epoch 00008: val_loss did not improve from 0.17055
Epoch 9/16
Epoch 00009: val_loss did not improve from 0.17055
Epoch 10/16
Epoch 00010: val_loss did not improve from 0.17055
Epoch 11/16
Epoch 00011: val_loss did not improve from 0.17055
Epoch 12/16
Epoch 00012: val_loss did not improve from 

<tensorflow.python.keras.callbacks.History at 0x7f1200524470>

## Prediction




In [0]:
class testDataGenerator(tf.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, pdDataFrame, dbName, labels=['oQ', 'RQ', 'CQ', 'FD', 'FQ', 'IR', 'PA', 'PF', 'NF', 'GG', 'JK', 'O'],\
                 batch_size=25, dim=MAX_LEN, n_channels=N_CHANNELS,\
                 n_classes=N_CLASS, shuffle=False):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = pdDataFrame
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.dbName=dbName
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = self.list_IDs.iloc[indexes,:]

        # Generate data
        X = self.__data_generation(list_IDs_temp.reset_index(drop=True))

        return X

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.zeros((self.batch_size, self.dim, self.n_channels))
        #y = np.zeros((self.batch_size,self.n_classes), dtype=int)

        # Generate data
        for i in range(len(list_IDs_temp)):
            utterenceID=list_IDs_temp.loc[i,'id']
            diaglogID=list_IDs_temp.loc[i,'diaglogID']
            try:
              temp=np.load('BERT/vector/'+self.dbName+'_'+str(utterenceID)+'_'+str(diaglogID)+'.npy')
              X[i,0:temp.shape[0],:] =temp 
              del temp
            except:
              print('Faile to load the data: BERT/vector/'+self.dbName+'_'+str(utterenceID)+'_'+str(diaglogID)+'.npy')
            # Store sample
            # Store class
            #y[i,:] = np.array(list_IDs_temp.iloc[i,0:12])

        return X

In [0]:
test_generator=testDataGenerator(Test,'Test')

In [0]:
prediction = model.predict_generator(test_generator)


In [0]:
IOU=0
for i in range(len(Test)):
  pred=prediction[i]
  labels=np.array(Test.iloc[i,0:12])
  ioU=np.sum((pred>=0.5)&(labels==1))/np.sum( (pred>=0.5) | (labels==1))
  IOU+=ioU
IOU=IOU/len(Test)
print(IOU)

0.5801081081081079


In [0]:
IOU=0
for i in range(len(Test)):
  pred=prediction[i]
  labels=np.array(Test.iloc[i,0:12])
  ioU=np.sum((pred==labels)/12
  IOU+=ioU
IOU=IOU/len(Test)
print(IOU)