**Import Libraries**

In [108]:
# data manipulation
import pandas as pd

# feature engineering
from nltk.tokenize import word_tokenize
import re
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# model definition & training
import tensorflow as tf
from tensorflow.keras.layers import TextVectorization

# save model
import pickle

# ignore warning
import warnings
warnings.filterwarnings('ignore')

# show all columns
pd.set_option('display.max_columns', None)

# **9. Model Inference**

- This is the step where we test our 'exported' models with a data that they completely have not seen before 

## **9.1. Load Model and Other Files**

In [109]:
vectorization_data = pickle.load(open('vectorizer.pkl', 'rb'))
vectorizer = TextVectorization.from_config(vectorization_data['config'])
vectorizer.set_weights(vectorization_data['weights'])

model = tf.keras.models.load_model('model.h5')

## **9.2. Create Dummy Data**

- The dummy data contains data that the model completely has never seen before

In [110]:
df_dummy = pd.DataFrame({
    'user_review' : ['this is a very boring game!']
})

display(df_dummy)

Unnamed: 0,user_review
0,this is a very boring game!


## **9.3. Preprocess Data**

In [111]:
# define stopwords
nltk_stopword = set(stopwords.words('english'))

# define lemmatizer
lemmatizer = WordNetLemmatizer()

In [112]:
# create a function for text preprocessing

def text_preprocessing(text):
  # case folding
  text = text.lower()

  # mention removal
  text = re.sub("@[a-za-z0-9_]+", " ", text)

  # hashtags removal
  text = re.sub("#[a-za-z0-9_]+", " ", text)

  # newline removal (\n)
  text = re.sub(r"\\n", " ",text)

  # whitespace removal
  text = text.strip()

  # url removal
  text = re.sub(r"http\s+", " ", text)
  text = re.sub(r"www.\s+", " ", text)

  # non-letter removal (such as emoticon, symbol (like μ, $, 兀), etc
  text = re.sub("[^a-za-z\s']", " ", text)
  text = re.sub("'", "", text)

  # tokenization
  tokens = word_tokenize(text)

  # stopwords removal
  tokens = [word for word in tokens if word not in nltk_stopword]

  # lemmatizing
  tokens = [lemmatizer.lemmatize(word) for word in tokens]

  # combining tokens
  text = ' '.join(tokens)

  return text

In [113]:
data_preprocessed = df_dummy['user_review'].apply(lambda x: text_preprocessing(x))

data_preprocessed

0    boring game
Name: user_review, dtype: object

## **9.4. Text Vectorization**

In [114]:
data_vect = vectorizer(data_preprocessed)

## **9.5. Predict Price**

In [115]:
# predict user_suggestion
predicted_user_suggestion_proba = model.predict(data_vect)

# show result
predict_result = ''
threshold = 0.75
if predicted_user_suggestion_proba[0] > threshold:
    predict_result = 'Recommended'
else:
    predict_result = 'Not_Recommended'

print(f"predicted booking status proba: {predicted_user_suggestion_proba[0]}")
print(f"predicted booking status: {predict_result}")

predicted booking status proba: [0.3751136]
predicted booking status: Not_Recommended
