#Download Needed Files

FILES : 


*   requirements.txt
*   TRAIN_FIX.csv
*   TEST_FIX.csv



In [None]:
!gdown --id 1zBU0xqLUZstpjlG-uxNdTdQmfxVcs2iW
!gdown --id 1D4eLGtIxWmXzvFZ9kBcipR0MvCr6cQrm
!gdown --id 1eXCZf6B_11Yq54ksf4vr-peS_p-lwScr

#Install and Import Library

**INSTALLING REQUREMENTS LIBRARY**

*   Flask==2.1.0
*   gunicorn==20.1.0
*   tensorflow==2.6.0
*   tensorflow-text==2.6.0
*   tensorflow-hub==0.12.0 
*   scikit-learn==0.23.2
*   numpy==1.19.5
*   pandas==1.1.3

Requirements library can be updated in case there is update in the script


In [None]:
!pip install -r requirements.txt

Importing the library

In [10]:
import os
#import shutil
import pandas as pd
from sklearn.preprocessing import LabelBinarizer

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
import seaborn as sns

#Preparing Dataset

Inputing our dataset to train the model. List of dataset used here. Dataset based on the traveloka help center.


> Traveloka Help Center 
> https://www.traveloka.com/en-id/help

File :

*   TRAIN_FIX.csv
*   TEST_FIX.csv



In [None]:
datafolder="/content/"

trainfile=datafolder+"Timeline ML - TRAIN_FIX.csv"
traindf = pd.read_csv(trainfile)
traindf.pop('answer')

In [13]:
trainfeatures=traindf.copy()
trainlabels=trainfeatures.pop("intent")
trainfeatures=trainfeatures.values

Convert each intent to binary label with LabelBinarizer().

In [14]:
binarizer=LabelBinarizer()
trainlabels=binarizer.fit_transform(trainlabels.values)

Prepare a test dataset

In [15]:
testfile=datafolder+"Timeline ML - TEST_FIX.csv"
testdf = pd.read_csv(testfile)
testdf.pop('answer')

testfeatures=testdf.copy()
testlabels=testfeatures.pop("intent")
testfeatures=testfeatures.values
testlabels=binarizer.transform(testlabels.values)

#Load the model

In this case we are using small_bert/bert_en_uncased_L-8_H-512_A-8 and bert_en_uncased_preprocess

In [16]:
bert_model_name = 'small_bert/bert_en_uncased_L-8_H-512_A-8' 
bert_model_link = 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-512_A-8/1'
bert_preprocess_link = 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/2'

tfhub_handle_encoder = bert_model_link
tfhub_handle_preprocess = bert_preprocess_link

print(f'BERT model selected           : {tfhub_handle_encoder}')
print(f'Preprocess model auto-selected: {tfhub_handle_preprocess}')

BERT model selected           : https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-512_A-8/1
Preprocess model auto-selected: https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/2


#Preprocess and Bert Model

In [17]:
bert_preprocess_model = hub.KerasLayer(tfhub_handle_preprocess)

In [None]:
text_test = trainfeatures[0]
text_preprocessed = bert_preprocess_model(text_test)

print(f'Keys       : {list(text_preprocessed.keys())}')
print(f'Shape      : {text_preprocessed["input_word_ids"].shape}')
print(f'Word Ids   : {text_preprocessed["input_word_ids"][0, :12]}')
print(f'Input Mask : {text_preprocessed["input_mask"][0, :12]}')
print(f'Type Ids   : {text_preprocessed["input_type_ids"][0, :12]}')

In [19]:
bert_model = hub.KerasLayer(tfhub_handle_encoder)

In [None]:
bert_results = bert_model(text_preprocessed)

print(f'Loaded BERT: {tfhub_handle_encoder}')
print(f'Pooled Outputs Shape:{bert_results["pooled_output"].shape}')
print(f'Pooled Outputs Values:{bert_results["pooled_output"][0, :12]}')
print(f'Sequence Outputs Shape:{bert_results["sequence_output"].shape}')
print(f'Sequence Outputs Values:{bert_results["sequence_output"][0, :12]}')

#Creating the model for the classifier

In [21]:
def build_classifier_model():
  text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
  preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name='preprocessing')
  encoder_inputs = preprocessing_layer(text_input)
  encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='BERT_encoder')
  outputs = encoder(encoder_inputs)
  net = outputs['pooled_output']
  net = tf.keras.layers.Dropout(0.1)(net)
  net = tf.keras.layers.Dense(44, activation=None, name='classifier')(net)
  return tf.keras.Model(text_input, net)

In [22]:
classifier_model = build_classifier_model()

In [None]:
classifier_model = build_classifier_model()
bert_raw_result = classifier_model(tf.constant(trainfeatures[0]))
print(tf.keras.activations.softmax(bert_raw_result))

In [24]:
classifier_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
text (InputLayer)               [(None,)]            0                                            
__________________________________________________________________________________________________
preprocessing (KerasLayer)      {'input_word_ids': ( 0           text[0][0]                       
__________________________________________________________________________________________________
BERT_encoder (KerasLayer)       {'encoder_outputs':  41373185    preprocessing[0][0]              
                                                                 preprocessing[0][1]              
                                                                 preprocessing[0][2]              
____________________________________________________________________________________________

#Train Model

In [25]:
loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
metrics = tf.metrics.CategoricalAccuracy()
epochs=1

optimizer=tf.keras.optimizers.Adam(1e-5)
classifier_model.compile(optimizer=optimizer,
                         loss=loss,
                         metrics=metrics)

In [None]:
history = classifier_model.fit(x=trainfeatures,y=trainlabels,
                               batch_size=32,
                               epochs=epochs)

#Evaluate the model

In [None]:
loss, accuracy = classifier_model.evaluate(testfeatures,testlabels)

print(f'Loss: {loss}')
print(f'Accuracy: {accuracy}')

#Save the model 

The model exported to .h5 file

In [28]:
classifier_model.save('model.h5')