### **NLP using Tensorflow**

In [1]:
# DL needs
import tensorflow as tf
import keras as kr

# Data needs
import pandas as pd
from sklearn.model_selection import train_test_split

# Numerical computation needs
import numpy as np

# plotting needs
import matplotlib.pyplot as plt
import matplotlib_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

# ensuring reproducibility
random_seed=42
tf.random.set_seed(random_seed)
import sys

sys.path.append('/home/rudraksha14/Desktop/RAY_RISE_ABOVE_YOURSELF/Programming/tensorflow')
import important_functionalities as impf

2025-03-24 17:58:12.186156: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
baseline_results = {'accuracy': 79.26509186351706,
 'precision': 0.8111390004213173,
 'recall': 0.7926509186351706,
 'f1_score': 0.7862189758049549}

model_1_results = {'accuracy': 80.4461942257218,
 'precision': 0.8065100939758145,
 'recall': 0.8044619422572179,
 'f1_score': 0.8028505735911119}

model_2_results = {'accuracy': 74.80314960629921,
 'precision': 0.7475572440372034,
 'recall': 0.7480314960629921,
 'f1_score': 0.7475400591051667}

model_3_results = {'accuracy': 75.45931758530183,
 'precision': 0.7546642240189775,
 'recall': 0.7545931758530183,
 'f1_score': 0.7531886844350836}

model_4_results = {'accuracy': 74.93438320209974,
 'precision': 0.7500996927165142,
 'recall': 0.7493438320209974,
 'f1_score': 0.747278252053036}

model_5_results = {'accuracy': 77.16535433070865,
 'precision': 0.7722289521502119,
 'recall': 0.7716535433070866,
 'f1_score': 0.7701831305177762}

In [3]:
train_df=pd.read_csv('train.csv')
test_df=pd.read_csv('test.csv')
train_df.head()

Unnamed: 0,id,keyword,location,text,target
0,1,,,Our Deeds are the Reason of this #earthquake M...,1
1,4,,,Forest fire near La Ronge Sask. Canada,1
2,5,,,All residents asked to 'shelter in place' are ...,1
3,6,,,"13,000 people receive #wildfires evacuation or...",1
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1


In [4]:
# shuffle training dataframe
train_df_shuffled=train_df.sample(frac=1,random_state=random_seed) # frac: percentage of  data to be shuffled
train_df_shuffled.head()

Unnamed: 0,id,keyword,location,text,target
2644,3796,destruction,,So you have a new weapon that can cause un-ima...,1
2227,3185,deluge,,The f$&amp;@ing things I do for #GISHWHES Just...,0
5448,7769,police,UK,DT @georgegalloway: RT @Galloway4Mayor: ÛÏThe...,1
132,191,aftershock,,Aftershock back to school kick off was great. ...,0
6845,9810,trauma,"Montgomery County, MD",in response to trauma Children of Addicts deve...,0


In [5]:
train_sentences,val_sentences,train_labels,val_labels=train_test_split(train_df_shuffled['text'].to_numpy(),train_df_shuffled['target'].to_numpy(),test_size=0.1,random_state=random_seed)

**16. Model 6: Tensorflow hub pre-trained feature extractor [USE (Universal Sentence Encoder)]**

* USE (Universal Sentence Encoder) Feature Extractor
* Conversion of Sequence of text into numerical format, this process is also called as encoding, and model block achieving this is called Encoder.
* The part of model/ model block which decodes this numerical representation/encoding into some other format / desired output is called decoder.
* The encoder-decoder architecture is very common in NLP models 

In [6]:
import tensorflow_hub as hub

embed = hub.load("https://www.kaggle.com/models/google/universal-sentence-encoder/TensorFlow2/universal-sentence-encoder/2")

In [7]:
embeddings = embed([
    "The quick brown fox jumps over the lazy dog.",
    "I am a sentence for which I would like to get its embedding"])
print(embeddings[0].shape)

(512,)


In [8]:
# Create a keras layer using USE pretrained layer from tensorflow hub
sentence_encoder_layer = hub.KerasLayer("https://www.kaggle.com/models/google/universal-sentence-encoder/TensorFlow2/universal-sentence-encoder/2",
                                        input_shape = [],
                                        dtype=tf.string,
                                        trainable=False, # default=False,
                                        name='USE'
                                        )

# sentence_encoder_layer = hub.KerasLayer("https://tfhub.dev/google/universal-sentence-encoder/4",
#                                         input_shape = [],
#                                         dtype=tf.string,
#                                         trainable=False, # default=False,
#                                         name='USE'
#                                         )

In [None]:
@kr.saving.register_keras_serializable(package="my_custom_package")
class UniversalEncodedLayer(tf.keras.layers.Layer):
    def __init__(self,**kwargs):
        super().__init__(**kwargs)
        self.use_layer = hub.KerasLayer("https://www.kaggle.com/models/google/universal-sentence-encoder/TensorFlow2/universal-sentence-encoder/2",
                                        input_shape = [],
                                        dtype=tf.string,
                                        trainable=False, # default=False,
                                        name='USE'
                                        )

    def call(self,inputs):
        return self.use_layer(inputs)
    

In [10]:
inputs = tf.keras.layers.Input(shape=[],dtype=tf.string)
embedding_vector = UniversalEncodedLayer()(inputs)
x=tf.keras.layers.Dense(units=64,activation='relu')(embedding_vector)
outputs = tf.keras.layers.Dense(units = 1, activation = 'sigmoid')(x)
model_6=tf.keras.models.Model(inputs,outputs,name='model_6')


In [11]:
# compile the model
model_6.compile(loss='binary_crossentropy', 
                metrics=['accuracy'], 
                optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))

In [12]:
model_6.summary()

In [13]:
# create a tensorboard callback (need to create a new one for each model)
SAVE_DIR = 'model_logs'

# fit the model
history_model_6 = model_6.fit(train_sentences,train_labels,
                              epochs=5,
                              validation_data=(val_sentences,val_labels),
                              callbacks=[impf.create_tensorboard_callback(dir_name=SAVE_DIR,experiment_name='model_6_transfer_learning')]
                              )

Saving TensorBoard log files to : model_logs/model_6_transfer_learning/20250324-175912
Epoch 1/5
[1m215/215[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.7546 - loss: 0.5793 - val_accuracy: 0.7992 - val_loss: 0.4472
Epoch 2/5
[1m215/215[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.8040 - loss: 0.4220 - val_accuracy: 0.8058 - val_loss: 0.4369
Epoch 3/5
[1m215/215[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.8135 - loss: 0.4069 - val_accuracy: 0.8123 - val_loss: 0.4317
Epoch 4/5
[1m215/215[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.8164 - loss: 0.3985 - val_accuracy: 0.8123 - val_loss: 0.4285
Epoch 5/5
[1m215/215[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.8200 - loss: 0.3921 - val_accuracy: 0.8110 - val_loss: 0.4267


In [14]:
# making predictions:
model_6_preds=model_6.predict(val_sentences)
thresh=0.5
model_6_preds_threshed=list(map(lambda x: 1 if x>thresh else 0,model_6_preds))

# calculating results:
model_6_results=impf.calculate_results(val_labels,model_6_preds_threshed)
model_6_results

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step


{'accuracy': 81.10236220472441,
 'precision': 0.8113631917551808,
 'recall': 0.8110236220472441,
 'f1_score': 0.8102336058004984}

In [15]:
# comparing model 6 with baseline
np.array(list(model_6_results.values()))>np.array(list(baseline_results.values()))

array([ True,  True,  True,  True])

In [16]:
# comparing model 6 with model 1
np.array(list(model_6_results.values()))>np.array(list(model_1_results.values()))

array([ True,  True,  True,  True])

In [17]:
# comparing model 6 with model 2
np.array(list(model_6_results.values()))>np.array(list(model_2_results.values()))

array([ True,  True,  True,  True])

In [18]:
# comparing model 6 with model 3
np.array(list(model_6_results.values()))>np.array(list(model_3_results.values()))

array([ True,  True,  True,  True])

In [19]:
# comparing model 6 with model 4
np.array(list(model_6_results.values()))>np.array(list(model_4_results.values()))

array([ True,  True,  True,  True])

In [20]:
# comparing model 6 with model 5
np.array(list(model_6_results.values()))>np.array(list(model_5_results.values()))

array([ True,  True,  True,  True])

In [21]:
### SAVING OUR BEST MODEL
model_6.save('models/best_model.keras')

In [22]:
model_6.evaluate(val_sentences,val_labels)

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7843 - loss: 0.4778


[0.4266805648803711, 0.8110235929489136]

In [23]:
### LOAD and EVALUATE
model = tf.keras.models.load_model('models/best_model.keras')

In [24]:
model.evaluate(val_sentences,val_labels)

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.7843 - loss: 0.4778


[0.4266805648803711, 0.8110235929489136]

**Conclusion:**
* Outperforms all previous models

***-- CONTD IN NEXT NOTEBOOK --***