In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow_hub as hub
import tensorflow_text as text
import os

current_dir = os.getcwd()
current_dir_linux = current_dir.replace('\\', '/')

# print the avalible gpu count
print("The number of avalible GPUs: ", len(tf.config.experimental.list_physical_devices('GPU')))

The number of avalible GPUs:  1


In [2]:
# Create a function to implement a ModelCheckpoint callback with a specific filename.

def create_model_checkpoint(model_name, save_path="model_experiments"):
  return tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(save_path, model_name), # create filepath to save model
                                            verbose=0, # only output a limited amount of text
                                            save_best_only=True) # save only the best model to file

# Prepareing the data for our model

In [3]:
# convert the product_review.csv file into UTF-8 format
df = pd.read_csv(current_dir_linux + "/data/product_reviews.csv", encoding="ISO-8859-1")
# drop the columns that named folowing: ProductName, Rate, Review, ProductPrice
df.drop(["ProductName", "Rate", "Review", "ProductPrice"], axis=1, inplace=True)

# drop the null values from dataframe
df.dropna(inplace=True)

# get all values in Sentiment column to lowercase
df["Sentiment"] = df["Sentiment"].str.lower()

df[:10]

  df = pd.read_csv(current_dir_linux + "/data/product_reviews.csv", encoding="ISO-8859-1")


Unnamed: 0,Summary,Sentiment
0,great cooler excellent air flow and for this p...,positive
1,best budget 2 fit cooler nice cooling,positive
2,the quality is good but the power of air is de...,positive
3,very bad product its a only a fan,negative
4,ok ok product,neutral
5,the cooler is really fantastic and provides go...,positive
6,very good product,positive
7,very nice,positive
8,very bad cooler,negative
9,very good,positive


In [4]:
# get dummies for the Sentiment column named as: negative, neutral, positive
df = pd.get_dummies(df, columns=["Sentiment"], prefix="", prefix_sep="")
df[:10]

Unnamed: 0,Summary,negative,neutral,positive
0,great cooler excellent air flow and for this p...,0,0,1
1,best budget 2 fit cooler nice cooling,0,0,1
2,the quality is good but the power of air is de...,0,0,1
3,very bad product its a only a fan,1,0,0
4,ok ok product,0,1,0
5,the cooler is really fantastic and provides go...,0,0,1
6,very good product,0,0,1
7,very nice,0,0,1
8,very bad cooler,1,0,0
9,very good,0,0,1


In [5]:
class_names = df.columns[1:]

In [6]:
# split the data into train and test with train_test_split function
from sklearn.model_selection import train_test_split

# set the random seed
tf.random.set_seed(42)

# split the data into train and test
train_sentences, test_sentences, train_labels, test_labels = train_test_split(df["Summary"].to_numpy(),
                                                                                df[["negative", "neutral", "positive"]].to_numpy(),
                                                                                test_size=0.2,
                                                                                random_state=42)


In [7]:
# convert all sets into numpy arrays
train_sentences = np.array(train_sentences)
test_sentences = np.array(test_sentences)
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)

In [8]:
len(train_sentences), len(test_sentences), len(train_labels), len(test_labels)

(137103, 34276, 137103, 34276)

In [9]:
### Converting train-test data into tf.data datasets

train_generetor = tf.data.Dataset.from_tensor_slices((train_sentences, train_labels)).batch(32).prefetch(tf.data.AUTOTUNE)
test_generetor = tf.data.Dataset.from_tensor_slices((test_sentences, test_labels)).batch(32).prefetch(tf.data.AUTOTUNE)

# Creating the model

In [15]:
bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
bert_embeddings = hub.KerasLayer("https://tfhub.dev/google/experts/bert/wiki_books/mnli/2", trainable=True)

In [None]:
tf.random.set_seed(42)

inputs = tf.keras.layers.Input(shape=[], dtype=tf.string, name="input_layer")
preprocessed_text = bert_preprocess(inputs)
outputs = bert_embeddings(preprocessed_text)

x = tf.keras.layers.Dropout(0.1, name="dropout")(outputs["pooled_output"])
x = tf.keras.layers.Dense(128, activation="relu", name="dense_1")(x)
x = tf.expand_dims(x, axis=2)
x = tf.keras.layers.Conv1D(256, 5, activation="relu", name="conv_1d")(x)
x = tf.keras.layers.GlobalAveragePooling1D()(x)
x = tf.expand_dims(x, axis=2)
x = tf.keras.layers.Conv1D(256, 5, activation="relu", name="conv_1d_2")(x)
x = tf.keras.layers.GlobalAveragePooling1D()(x)
x = tf.keras.layers.Dense(128, activation="relu", name="dense_2")(x)
x = tf.keras.layers.Dense(3, activation="softmax", name="output_layer")(x)

model_1 = tf.keras.Model(inputs, x, name="model_1_bert_embeddings")

model_1.compile(loss="categorical_crossentropy",
                optimizer=tf.keras.optimizers.Adam(),
                metrics=["accuracy"])

model_1.fit(
    train_generetor,
    steps_per_epoch=len(train_generetor),
    epochs=5,
    validation_data=test_generetor,
    validation_steps=int(0.15 * len(test_generetor)),
    callbacks=[create_model_checkpoint(model_name="model_1_bert_embeddings_01.h5")]
)

In [74]:
model_1 = tf.keras.models.load_model("model_experiments/model_1_bert_embeddings_01.h5", custom_objects={"KerasLayer": hub.KerasLayer})





In [None]:
model_1.evaluate(test_generetor)

In [127]:
test_sentence = input("Enter a sentence: ").lower()

# make prediction on test_sentence
model_1_pred_probs = model_1.predict(tf.expand_dims(test_sentence, axis=0))
prediced_class = class_names[np.argmax(model_1_pred_probs)]
class_prob = np.max(model_1_pred_probs) * 100

print(f"Test sentence: {test_sentence}")
print(f"Predicted class: {prediced_class} with probability of %{round(class_prob)}")

Test sentence: lol 
Predicted class: neutral with probability of %57
