# Imports

In [None]:

from google.colab import drive
drive.mount('/content/drive', force_remount=True)


Mounted at /content/drive


In [None]:

!pip install transformers
from transformers import DistilBertTokenizerFast 
import re
import numpy as np
import tensorflow as tf
from tensorflow_addons.text import  viterbi_decode
import pickle


# Distilbert model

In [None]:

unique_intents =  np.array( ['analyze_transactions' , 'balances' , 'bank_related' , 'chatbot_related' , 'check_credit_card_details' , 
                             'check_loan_details','greetings','transactions_intent'] )

id2tag = { 0 : 'o' , 1 : 's-id', 2 : 'e-id', 3 : 's-product', 4 : 'e-product' , 5 : 'na' }

tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')

bert_model = tf.keras.models.load_model('/content/drive/My Drive/AAI/models/bert_model.h5')
bert_model.summary()


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…


Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_ids (InputLayer)          [(None, 50)]         0                                            
__________________________________________________________________________________________________
attention_mask (InputLayer)     [(None, 50)]         0                                            
__________________________________________________________________________________________________
distilbert (TFDistilBertMainLay ((None, 50, 768),)   66362880    input_ids[0][0]                  
                                                                 attention_mask[0][0]             
__________________________________________________________________________________________________
tf_op_layer_slice1 (TensorFlowO (None, 1, 768)       0           distilbert[0][0]     

In [None]:

def bert_model_out( text ):

    '''
    This function applies bert model trained on input queries of a banking chatbot to classify the intent and output ner tags at the same time.
    Types of intents : 'analyze_transactions' , 'balances' , 'bank_related' , 'chatbot_related' , 'check_credit_card_details' , 'check_loan_details' , 'greetings','transactions_intent'
    Types of tags : 'o','s-id','e-id','s-product','e-product','na' 

    Inputs to function : a single sentence in str format
    Outputs w.r.t. index :
    0 - intent_predictions , 
    1 - ner_predictions , 
    2 - word_vectors , 
    3 - sentence_vector    
    '''

    ner_predictions = []
    word_vectors = []

    # remove extra spaces from input text and split sentence to words
    re.sub('  +' , ' ' , text)
    text = [text.split(' ')]

    # pretrained Distilbert wordpiece tokenizer with padding and truncation at specified max_length
    encoding = tokenizer(text , is_pretokenized = True , return_offsets_mapping=True , padding='max_length' , truncation=True , max_length=50)

    # 3 outputs from tokenizer - input ids , attention_mask and offset mapping from wordpiece segmentation
    input_ids = np.array(encoding.input_ids)
    attention_mask = np.array(encoding.attention_mask)
    offset_mapping = encoding.offset_mapping

    # input ids and attention mask inputted to pretrained model to get classifier and ner probability distributions 
    # along with output vector representation of each input token (CLS , SEP , PAD tokens included)
    cls , sentence , vectors = bert_model([input_ids , attention_mask])

    # vector output corresponding to CLS token is used as sentence vector
    sentence_vector = vectors[0,0,:]

    # argmax to find intent with highest probability
    intent_predictions = unique_intents[tf.math.argmax(cls , axis = 1).numpy()][0] 

    # if we take sentence example - "Pay Ramesh 1000" (after tokenization we get ['pay' , 'Ram' , '##es' , '##sh' , '1000'])
    # below stretch of words helps deal with offset words (Ramesh --> Ram + ##es + ##sh) from wordpiece segmentation
    # it gives a dictionary 'start_end' where keys correspond to index of Ram in list above and values correspond to ##sh index
    te1 = np.array([i for i,j in offset_mapping[0]]) > 0
    te2 = np.roll( te1 , 1 )
    starts = te1 & ~te2
    ends = ~te1 & te2
    starts = np.nonzero(starts)[0]
    ends = np.nonzero(ends)[0]
    start_end = dict(zip(starts,ends))

    # loop on each token output
    k = 1
    for i in text[0]:

        # if token index is in 'start_end' keys, we use the respective dictionary values and try to ""aggregate"" the distributed segments 
        # in terms of ner probability distribution and vectors corresponding to these segmented tokens input 
        if k in start_end.keys():

            probs = sentence[0 , k-1 , : ]

            # ner argmax for 1st word in segment is appended for reunited segment
            ner = tf.math.argmax(probs).numpy()
            ner = id2tag[ner]
            ner_predictions.append(ner)

            # word vectors are literally aggregated 
            word_vector = tf.reduce_mean(vectors[0 , k:start_end[k] , : ] , axis = 0)               
            word_vectors.append(word_vector)

            # new index is after last '##...' word
            k = start_end[k]
            
        # if token index not in 'start_end' keys we simply append the argmax ner index and the generated vector for that token
        else:

            probs = sentence[0 , k-1 , : ]
            
            ner = tf.math.argmax(probs).numpy()
            ner = id2tag[ner]
            ner_predictions.append(ner)

            word_vector = vectors[0 , k , :]
            word_vectors.append(word_vector)

            # new index is old + 1
            k += 1
            
    return intent_predictions , ner_predictions , word_vectors , sentence_vector


In [None]:

intent_predictions , ner_predictions , word_vectors , sentence_vector = bert_model_out('Pay Suryakanth Sharma with my credit card')

print(intent_predictions ,'\n')
print(ner_predictions ,'\n') 
print([i.shape for i in word_vectors] ,'\n')
print(sentence_vector.shape,'\n')




transactions_intent 

['o', 's-id', 'na', 'e-id', 'o', 'o', 's-product'] 

[TensorShape([768]), TensorShape([768]), TensorShape([768]), TensorShape([768]), TensorShape([768]), TensorShape([768]), TensorShape([768])] 

(768,) 



# GRU + CRF model


In [None]:

unique_intents =  np.array( ['analyze_transactions' , 'balances' , 'bank_related' , 'chatbot_related' , 'check_credit_card_details' , 
                             'check_loan_details','greetings','transactions_intent'] )

id2tag = { 0 : 'o' , 1 : 's-id', 2 : 'e-id', 3 : 's-product', 4 : 'e-product' , 5 : 'na' }

with open('/content/drive/My Drive/AAI/models/crf_transition_params' , 'rb') as f:
    transition_params = pickle.load(f)

tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')

gru_model = tf.keras.models.load_model('/content/drive/My Drive/AAI/models/gru_model.h5')
gru_model.summary()


Model: "functional_5"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_ids (InputLayer)          [(None, 50)]         0                                            
__________________________________________________________________________________________________
Word_Embedding_11 (Embedding)   (None, 50, 300)      9156600     input_ids[0][0]                  
__________________________________________________________________________________________________
bidirectional_1 (Bidirectional) (None, 50, 300)      406800      Word_Embedding_11[0][0]          
__________________________________________________________________________________________________
reshape_2 (Reshape)             (None, 15000)        0           bidirectional_1[0][0]            
_______________________________________________________________________________________

In [None]:

def gru_model_out( text ):

    '''
    This function applies gru + crf model trained on input queries of a banking chatbot to classify the intent and output ner tags at the same time.
    Types of intents : 'analyze_transactions' , 'balances' , 'bank_related' , 'chatbot_related' , 'check_credit_card_details' , 'check_loan_details' , 'greetings','transactions_intent'
    Types of tags : 'o','s-id','e-id','s-product','e-product','na' 

    Inputs to function : a single sentence in str format
    Outputs w.r.t. index :
    0 - intent_predictions , 
    1 - ner_predictions , 
    2 - word_vectors , 
    3 - sentence_vector    
    '''

    ner_predictions = []
    word_vectors = []

    # remove extra spaces from input text and split sentence to words
    re.sub('  +' , ' ' , text)
    text = [text.split(' ')]

    # pretrained Distilbert wordpiece tokenizer with padding and truncation at specified max_length
    encoding = tokenizer(text , is_pretokenized = True , return_offsets_mapping=True , padding='max_length' , truncation=True , max_length=50)

    # 3 outputs from tokenizer - input ids , attention_mask and offset mapping from wordpiece segmentation
    input_ids = np.array(encoding.input_ids)
    attention_mask = np.array(encoding.attention_mask)
    offset_mapping = encoding.offset_mapping

    # input ids and attention mask inputtend to pretrained model to get classifier and ner probability distributions 
    # along with output vector representation of each input token (CLS , SEP , PAD tokens included)
    cls , sentence , vectors = gru_model(input_ids)

    # vector output corresponding to CLS token is used as sentence vector
    sentence_vector = vectors[0,0,:]

    # argmax to find intent with highest probability
    intent_predictions = unique_intents[tf.math.argmax(cls , axis = 1).numpy()][0] 

    # virtebi_decode is algorithm which uses unary (from model output) and binary potentials (transition params) 
    # to predict whole ner sequence at once
    target_sentence = viterbi_decode( sentence[0] , transition_params )[0]

    # if we take sentence example - "Pay Ramesh 1000" (after tokenization we get ['pay' , 'Ram' , '##es' , '##sh' , '1000'])
    # below stretch of words helps deal with offset words (Ramesh --> Ram + ##es + ##sh) from wordpiece segmentation
    # it gives a dictionary 'start_end' where keys correspond to index of Ram in list above and values correspond to ##sh index
    te1 = np.array([i for i,j in offset_mapping[0]]) > 0
    te2 = np.roll( te1 , 1 )
    starts = te1 & ~te2
    ends = ~te1 & te2
    starts = np.nonzero(starts)[0]
    ends = np.nonzero(ends)[0]
    start_end = dict(zip(starts,ends))

    # loop on each token output
    k = 1
    for i in text[0]:

        # if token index is in 'start_end' keys, we use the respective dictionary values and try to ""aggregate"" the distributed segments 
        # in terms of ner probability distribution and vectors corresponding to these segmented tokens input
        if k in start_end.keys():

            # ner argmax for 1st word in segment is appended for reunited segment
            ner = target_sentence[ k-1 ]
            ner = id2tag[ner]
            ner_predictions.append(ner)

            # word vectors are literally aggregated 
            word_vector = tf.reduce_mean(vectors[0 , k:start_end[k] , : ] , axis = 0)               
            word_vectors.append(word_vector)

            # new index is after last '##...' word
            k = start_end[k]

        # if token index not in 'start_end' keys we simply append the argmax ner index and the generated vector for that token    
        else:

            ner = target_sentence[ k-1 ]
            ner = id2tag[ner]
            ner_predictions.append(ner)

            word_vector = vectors[0 , k , :]
            word_vectors.append(word_vector)

            # new index is old + 1
            k += 1

    return intent_predictions , ner_predictions , word_vectors , sentence_vector


In [None]:

intent_predictions , ner_predictions , word_vectors , sentence_vector = gru_model_out('Pay Suryakanth Sharma using credit card')

print(intent_predictions ,'\n')
print(ner_predictions ,'\n') 
print([i.shape for i in word_vectors] ,'\n')
print(sentence_vector.shape,'\n')


transactions_intent 

['o', 's-id', 'na', 'o', 'o', 's-product'] 

[TensorShape([300]), TensorShape([300]), TensorShape([300]), TensorShape([300]), TensorShape([300]), TensorShape([300])] 

(300,) 



