### **NLP using Tensorflow**

In [55]:
# DL needs
import tensorflow as tf
import keras as kr

# Data needs
import pandas as pd
from sklearn.model_selection import train_test_split

# Numerical computation needs
import numpy as np

# plotting needs
import matplotlib.pyplot as plt
import matplotlib_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

# ensuring reproducibility
random_seed=42
tf.random.set_seed(random_seed)
import sys

sys.path.append('/home/rudraksha14/Desktop/RAY_RISE_ABOVE_YOURSELF/Programming/tensorflow')
import important_functionalities as impf

In [56]:
baseline_results = {'accuracy': 79.26509186351706,
 'precision': 0.8111390004213173,
 'recall': 0.7926509186351706,
 'f1_score': 0.7862189758049549}

model_1_results = {'accuracy': 80.4461942257218,
 'precision': 0.8065100939758145,
 'recall': 0.8044619422572179,
 'f1_score': 0.8028505735911119}

model_2_results = {'accuracy': 74.80314960629921,
 'precision': 0.7475572440372034,
 'recall': 0.7480314960629921,
 'f1_score': 0.7475400591051667}

model_3_results = {'accuracy': 75.45931758530183,
 'precision': 0.7546642240189775,
 'recall': 0.7545931758530183,
 'f1_score': 0.7531886844350836}

model_4_results = {'accuracy': 74.93438320209974,
 'precision': 0.7500996927165142,
 'recall': 0.7493438320209974,
 'f1_score': 0.747278252053036}

model_5_results = {'accuracy': 77.16535433070865,
 'precision': 0.7722289521502119,
 'recall': 0.7716535433070866,
 'f1_score': 0.7701831305177762}

model_6_results = {'accuracy': 81.88976377952756,
 'precision': 0.8190585128848538,
 'recall': 0.8188976377952756,
 'f1_score': 0.8182856388893088}

In [57]:
train_df=pd.read_csv('train.csv')
test_df=pd.read_csv('test.csv')
train_df.head()

Unnamed: 0,id,keyword,location,text,target
0,1,,,Our Deeds are the Reason of this #earthquake M...,1
1,4,,,Forest fire near La Ronge Sask. Canada,1
2,5,,,All residents asked to 'shelter in place' are ...,1
3,6,,,"13,000 people receive #wildfires evacuation or...",1
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1


In [58]:
# shuffle training dataframe
train_df_shuffled=train_df.sample(frac=1,random_state=random_seed) # frac: percentage of  data to be shuffled
train_df_shuffled.head()

Unnamed: 0,id,keyword,location,text,target
2644,3796,destruction,,So you have a new weapon that can cause un-ima...,1
2227,3185,deluge,,The f$&amp;@ing things I do for #GISHWHES Just...,0
5448,7769,police,UK,DT @georgegalloway: RT @Galloway4Mayor: ÛÏThe...,1
132,191,aftershock,,Aftershock back to school kick off was great. ...,0
6845,9810,trauma,"Montgomery County, MD",in response to trauma Children of Addicts deve...,0


**17.  Model 7: Tensorflow hub pre-trained feature extractor (10% of data)**

* While making 10% data from original, take care not to have data leakage, i.e., take the 10 percent from the already splitted train_data

In [59]:
train_sentences,val_sentences,train_labels,val_labels=train_test_split(train_df_shuffled['text'].to_numpy(),train_df_shuffled['target'].to_numpy(),test_size=0.1,random_state=random_seed)

In [60]:
percent=10
train_sentences_10_percent = train_sentences[:int((percent/100)*len(train_df_shuffled))]
train_labels_10_percent = train_labels[:int((percent/100)*len(train_df_shuffled))]

len(train_sentences_10_percent),len(train_labels_10_percent)


(761, 761)

In [61]:
import tensorflow_hub as hub
# Create a keras layer using USE pretrained layer from tensorflow hub
sentence_encoder_layer = hub.KerasLayer("https://www.kaggle.com/models/google/universal-sentence-encoder/TensorFlow2/universal-sentence-encoder/2",
                                        input_shape = [],
                                        dtype=tf.string,
                                        trainable=False, # default=False,
                                        name='USE'
                                        )

# sentence_encoder_layer = hub.KerasLayer("https://tfhub.dev/google/universal-sentence-encoder/4",
#                                         input_shape = [],
#                                         dtype=tf.string,
#                                         trainable=False, # default=False,
#                                         name='USE'
#                                         )

In [62]:
@kr.saving.register_keras_serializable(package="UniversalEncodedLayer")
class UniversalEncodedLayer(tf.keras.layers.Layer):   
    def call(self,inputs):
        return sentence_encoder_layer(inputs)

In [63]:
inputs = tf.keras.layers.Input(shape=[],dtype=tf.string)
embedding_vector = UniversalEncodedLayer()(inputs)
x=tf.keras.layers.Dense(units=64,activation='relu')(embedding_vector)
outputs = tf.keras.layers.Dense(units = 1, activation = 'sigmoid')(x)
model_7=tf.keras.models.Model(inputs,outputs,name='model_7')

In [64]:
# compile the model
model_7.compile(loss='binary_crossentropy', 
                metrics=['accuracy'], 
                optimizer=tf.keras.optimizers.Adam())

In [65]:
model_7.summary()

In [66]:
# create a tensorboard callback (need to create a new one for each model)
SAVE_DIR = 'model_logs'

early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=5,
                                                     monitor = 'val_loss'
)

# fit the model
history_model_7 = model_7.fit(train_sentences_10_percent,train_labels_10_percent,
                              epochs=5,
                              validation_data=(val_sentences,val_labels),
                              callbacks=[impf.create_tensorboard_callback(dir_name=SAVE_DIR,experiment_name='model_7_10_percent'),early_stopping_cb]
                              )

Saving TensorBoard log files to : model_logs/model_7_10_percent/20250324-174346
Epoch 1/5
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 46ms/step - accuracy: 0.6316 - loss: 0.6767 - val_accuracy: 0.7467 - val_loss: 0.6399
Epoch 2/5
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.8311 - loss: 0.5973 - val_accuracy: 0.7808 - val_loss: 0.5754
Epoch 3/5
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.8139 - loss: 0.5146 - val_accuracy: 0.7835 - val_loss: 0.5234
Epoch 4/5
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.8235 - loss: 0.4488 - val_accuracy: 0.7782 - val_loss: 0.4966
Epoch 5/5
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.8334 - loss: 0.4058 - val_accuracy: 0.7795 - val_loss: 0.4862


In [67]:
# making predictions:
model_7_preds=model_7.predict(val_sentences)
thresh=0.5
model_7_preds_threshed=list(map(lambda x: 1 if x>thresh else 0,model_7_preds))

# calculating results:
model_7_results=impf.calculate_results(val_labels,model_7_preds_threshed)
model_7_results

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step


{'accuracy': 77.95275590551181,
 'precision': 0.7796366729611287,
 'recall': 0.7795275590551181,
 'f1_score': 0.7785127368340806}

In [68]:
# comparing model 7 with baseline
np.array(list(model_7_results.values()))>np.array(list(baseline_results.values()))

array([False, False, False, False])

In [69]:
# comparing model 7 with model 1
np.array(list(model_7_results.values()))>np.array(list(model_1_results.values()))

array([False, False, False, False])

In [70]:
# comparing model 7 with model 2
np.array(list(model_7_results.values()))>np.array(list(model_2_results.values()))

array([ True,  True,  True,  True])

In [71]:
# comparing model 7 with model 3
np.array(list(model_7_results.values()))>np.array(list(model_3_results.values()))

array([ True,  True,  True,  True])

In [72]:
# comparing model 7 with model 4
np.array(list(model_7_results.values()))>np.array(list(model_4_results.values()))

array([ True,  True,  True,  True])

In [73]:
# comparing model 7 with model 5
np.array(list(model_7_results.values()))>np.array(list(model_5_results.values()))

array([ True,  True,  True,  True])

In [74]:
# comparing model 7 with model 6
np.array(list(model_7_results.values()))>np.array(list(model_6_results.values()))

array([False, False, False, False])

**Conclusion:**
* Outperforms all previous models

***-- CONTD IN NEXT NOTEBOOK --***