In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install gradio

In [None]:
import gradio as gr

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dot, Dense, Add, Concatenate
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import Dropout
import numpy as np

## Load Data

In [None]:
import json

def load_data_from_json(filepath):
  """Loads data from a JSON file.

  Args:
    filepath: The path to the JSON file.

  Returns:
    A dictionary containing the data loaded from the JSON file,
    or None if an error occurs.
  """
  try:
    with open(filepath, 'r') as file:
      data = json.load(file)
    return data
  except FileNotFoundError:
    print(f"Error: File not found at {filepath}")
    return None
  except json.JSONDecodeError:
    print(f"Error: Invalid JSON format in {filepath}")
    return None

In [None]:
data = load_data_from_json('/content/drive/MyDrive/CMPE256/CMPE256_Project/dataset/filter_all_t.json')

# Check if data is loaded successfully
if data:
    # Assuming 'train' key contains the desired data
    if 'train' in data and isinstance(data['train'], list):
        df = pd.DataFrame(data['train'])  # Create DataFrame from 'train' list
        print(df.head())
    else:
        print("Error: 'train' key not found or not a list in the JSON data.")
else:
    print("Error: Data not loaded from JSON file.")

                business_id                user_id  rating  \
0  60567465d335d0abfb415b26  101074926318992653684       4   
1  6050fa9f5b4ccec8d5cae994  117065749986299237881       5   
2  604be10877e81aaed3cc9a1e  106700937793048450809       4   
3  60411e017cd8bf130362365a  101643045857250355161       5   
4  604139dd7cd8bf1303624208  109802745326785766951       4   

                                         review_text  \
0  The tang of the tomato sauce is outstanding. A...   
1              Chicken and waffles were really good!   
2  The appetizer of colossal shrimp was very good...   
3  The fish tacos here  omg! The salad was great ...   
4  Ribs are great, as are the mac and cheese, fri...   

                                                pics  \
0  [AF1QipM-2IRmvitARbcJr7deWfe5hyVBg_ArPMQSYvq0,...   
1     [AF1QipMpfxIZUT_aymQ3qPGO-QgGYzxbtLZGmHufAp2s]   
2  [AF1QipMNnqM5X9sSyZ9pXRZ1jvrURHN9bZhGdzuEXoP8,...   
3  [AF1QipM-a6AGGp4Hgk5RD0gY5sDRp5kEfB1hZLvlRkft,...   
4     [AF1

In [None]:
df.keys()

Index(['business_id', 'user_id', 'rating', 'review_text', 'pics',
       'history_reviews'],
      dtype='object')

In [None]:
max_rating = df['rating'].max()
print(f"The maximum rating in the dataset is: {max_rating}")

The maximum rating in the dataset is: 5


In [None]:
min_rating = df['rating'].min()
print(f"The minimum rating in the dataset is: {min_rating}")

The minimum rating in the dataset is: 1


In [None]:
unique_user_ids = df['user_id'].nunique()
total_user_ids = len(df['user_id'])
print(f"Total number of User IDs: {total_user_ids}")
print(f"Number of unique User IDs: {unique_user_ids}")

Total number of User IDs: 87013
Number of unique User IDs: 29596


In [None]:
unique_business_ids = df['business_id'].nunique()
total_business_ids = len(df['business_id'])
print(f"Total number of Business IDs: {total_business_ids}")
print(f"Number of unique Business IDs: {unique_business_ids}")

Total number of Business IDs: 87013
Number of unique Business IDs: 27896


## Matrix Factorization Collaborative Filtering

In [None]:
'''
Encoding user_id and business_id in df
'''

# Separate encoders
user_encoder = LabelEncoder()
business_encoder = LabelEncoder()

# Fit on training data only
df['user_id_encoded'] = user_encoder.fit_transform(df['user_id'])
df['business_id_encoded'] = business_encoder.fit_transform(df['business_id'])

In [None]:
# Normalize
df['rating_normalized'] = (df['rating'] -min_rating)/(max_rating - min_rating)

In [None]:
df.head()

Unnamed: 0,business_id,user_id,rating,review_text,pics,history_reviews,user_id_encoded,business_id_encoded,rating_normalized
0,60567465d335d0abfb415b26,101074926318992653684,4,The tang of the tomato sauce is outstanding. A...,"[AF1QipM-2IRmvitARbcJr7deWfe5hyVBg_ArPMQSYvq0,...",[[101074926318992653684_6056272797d555cc6fb0d1...,1854,26649,0.75
1,6050fa9f5b4ccec8d5cae994,117065749986299237881,5,Chicken and waffles were really good!,[AF1QipMpfxIZUT_aymQ3qPGO-QgGYzxbtLZGmHufAp2s],[[117065749986299237881_605206f8d8c08f462b93e8...,27375,19273,1.0
2,604be10877e81aaed3cc9a1e,106700937793048450809,4,The appetizer of colossal shrimp was very good...,"[AF1QipMNnqM5X9sSyZ9pXRZ1jvrURHN9bZhGdzuEXoP8,...",[[106700937793048450809_6044300b27f39b7b5d1dbf...,10822,13238,0.75
3,60411e017cd8bf130362365a,101643045857250355161,5,The fish tacos here omg! The salad was great ...,"[AF1QipM-a6AGGp4Hgk5RD0gY5sDRp5kEfB1hZLvlRkft,...",[[101643045857250355161_604fbdd099686c10168c91...,2779,569,1.0
4,604139dd7cd8bf1303624208,109802745326785766951,4,"Ribs are great, as are the mac and cheese, fri...",[AF1QipNVys4yq-5w_3EsDdHpSc9ZNb7Nl30Mfb6Y0Gup],[[109802745326785766951_60524fa9f09a4ffff042f9...,15813,667,0.75


In [None]:
embedding_dim = 64

unique_user_ids = df['user_id'].nunique()
unique_business_ids = df['business_id'].nunique()

In [None]:
'''

User Tower

'''
user_input_placeholder = Input(shape=(1,), name='user_input')

#convert the user ID into a dense embedding vector of size embedding_dim
user_embedding = Embedding(input_dim=unique_user_ids + 1,
                           output_dim=embedding_dim,
                           name='user_embedding',
                           embeddings_regularizer=l2(1e-6))(user_input_placeholder)

#remove extra dimension so the embedding becomes a simple 1D vector
user_embedding = Flatten()(user_embedding)

In [None]:
'''

Business Tower

'''
business_input_placeholder = Input(shape=(1,), name='business_input')

#convert the business ID into a dense embedding vector of size embedding_dim
business_embedding = Embedding(input_dim=unique_business_ids + 1,
                           output_dim=embedding_dim,
                           name='business_embedding',
                           embeddings_regularizer=l2(1e-6))(business_input_placeholder)

#remove extra dimension so the embedding becomes a simple 1D vector
business_embedding = Flatten()(business_embedding)

In [None]:
'''

Traning model wih 2 Tower architecture

'''

user_bias = Embedding(unique_user_ids, 1)(user_input_placeholder)
business_bias = Embedding(unique_business_ids, 1)(business_input_placeholder)

user_bias = Flatten()(user_bias)
business_bias = Flatten()(business_bias)

# Interaction + Metadata
dot_product = Dot(axes=1)([user_embedding, business_embedding])
interaction = Add()([dot_product, user_bias, business_bias])


# Output layer for predicting rating
x = Dense(128, activation='relu')(interaction)
x = Dropout(0.3)(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(32, activation='relu')(x)
output = Dense(1, activation='linear')(x)

# Model
model = Model(inputs=[user_input_placeholder,
                      business_input_placeholder], outputs=output)

# Compile the model
model.compile(optimizer='adam', loss='mse')

In [None]:
model.summary()

In [None]:
train, test = train_test_split(df, test_size=0.2, random_state=42)

# Extract features and labels for training
train_user_ids = train['user_id_encoded'].values
train_business_ids = train['business_id_encoded'].values
train_normalized_ratings = train['rating_normalized'].values


test_user_ids = test['user_id_encoded'].values
test_business_ids = test['business_id_encoded'].values
test_normalized_ratings = test['rating_normalized'].values
test_ratings = test['rating'].values

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

In [None]:
# Early stopping to prevent overfitting
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=3,              # Stop after 3 bad epochs
    restore_best_weights=True,
    verbose=1
)

# Reduce learning rate when validation loss plateaus
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,              # Reduce by half
    patience=2,              # Wait 2 epochs
    min_lr=1e-6,             # Don't go below this
    verbose=1
)

checkpoint = ModelCheckpoint(
    filepath='/content/drive/MyDrive/CMPE256/CMPE256_Project/Models/NCF_2.keras',   # <- set save path
    monitor='val_loss',
    save_best_only=True,
    save_weights_only=False,
    verbose=1
)

In [None]:
# Check if GPU is available
if tf.test.gpu_device_name():
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
else:
    print("CPU")

CPU


In [None]:
# Load model
from tensorflow import keras
path = '/content/drive/MyDrive/CMPE256/CMPE256_Project/Models/final_NCF_2.keras'
model = keras.models.load_model(path)

## UI

In [None]:
picked_user_id = 101074926318992653684
rated_by_user = df[df['user_id'] == picked_user_id]['business_id'].unique()

# Find all items not rated by user
all_business = df['business_id'].unique()
business_not_rated = set(all_business) - set(rated_by_user)

In [None]:
# Create a list of user IDs with the same length as Business not rated
user_ids_for_prediction = [str(picked_user_id)] * len(business_not_rated)

# Encode the user IDs and Business IDs
user_ids_encoded = user_encoder.transform(user_ids_for_prediction)
business_ids_encoded = business_encoder.transform(list(business_not_rated))

In [None]:
predictions = model.predict([user_ids_encoded, business_ids_encoded])

[1m872/872[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step


In [None]:
predictions

array([[0.7720078 ],
       [0.79658955],
       [0.98416764],
       ...,
       [0.6917234 ],
       [0.7303642 ],
       [0.93688065]], dtype=float32)

In [None]:
recommendations = pd.DataFrame({
    'business_not_rated': list(business_not_rated),
    'predicted_rating': predictions.flatten()
})

In [None]:
recommendations = recommendations.sort_values(by=['predicted_rating'], ascending=False)

In [None]:
top_10_recommendations = recommendations.head(10)
top_10_recommendations

Unnamed: 0,business_not_rated,predicted_rating
3440,6045bdb33ed2666ae7ac1c28,0.995024
17249,6047a339b1a0aaee3eef8ff2,0.995023
23549,604f2e1288c7af3f893e631b,0.995023
24451,60503ec60f8302d2f838685a,0.995023
20684,604faeb9fbfca9f34567205d,0.995023
1143,604b167d92f94faf15cc6932,0.995023
21708,604acfd53ebca1b150aa18af,0.995023
11227,604e274871ced5c7554ddac4,0.995023
20951,604f762e88c7af3f893e64eb,0.995022
7673,60424b50c6fcf1fddba1792d,0.995022


In [None]:
filtered_df = df[df['business_id'] == "6050fa9f5b4ccec8d5cae994"]
first_one = filtered_df.iloc[2]
first_one

Unnamed: 0,3152
business_id,6050fa9f5b4ccec8d5cae994
user_id,114937200137348359038
rating,4
review_text,Yams wasn't hot everything else was.
pics,[AF1QipNfxqjv6uNX153l8zi2JRfNfguy3zupfr9ysrHH]
history_reviews,[[114937200137348359038_604a64c33ebca1b150aa15...
user_id_encoded,23992
business_id_encoded,19273
rating_normalized,0.75


In [None]:
first_one['history_reviews']

[['114937200137348359038_604a64c33ebca1b150aa1542',
  'The shrimp tacos are a MUST try.'],
 ['114937200137348359038_604dbd3e2381ce29c9a1ea49',
  'This place is so overrated....I ordered a combo fajitas they gave me 3 shrimps 1 sliver of a green and red pepper each and 4 slices of chicken for 18 bucks. On top of the rice and beans on the same plate as the  fajitas on the hot griddle plate.'],
 ['114937200137348359038_6050ce10d8c08f462b93e026',
  'The chicken enchiladas was very tasteful. 4 stars because the Charro beans needed more seasoning. Salsa and Chips 2 baskets and 2 bowls of salsa need I say more.']]

In [None]:
import random

def predict_rating(user_id, business_id):
    #  trained model
    return random.uniform(1, 5)

In [None]:
business_ids = df['business_id'].unique()

In [None]:
def recommend_restaurants(user_id, top_n=3):
    rated_by_user = df[df['user_id'] == user_id]['business_id'].unique()

    # Find all item not watched by the picked user
    all_business = df['business_id'].unique()
    business_not_rated = set(all_business) - set(rated_by_user)

    # Create a list of user IDs with the same length as Business not rated
    user_ids_for_prediction = [str(picked_user_id)] * len(business_not_rated)

    # Find all item not watched by the picked user
    all_business = df['business_id'].unique()
    business_not_rated = set(all_business) - set(rated_by_user)

    # Encode the user IDs and Business IDs
    user_ids_encoded = user_encoder.transform(user_ids_for_prediction)
    business_ids_encoded = business_encoder.transform(list(business_not_rated))

    predictions = model.predict([user_ids_encoded, business_ids_encoded])

    recommendations = pd.DataFrame({
    'business_not_rated': list(business_not_rated),
    'predicted_rating': predictions.flatten()
      })

    recommendations = recommendations.sort_values(by=['predicted_rating'], ascending=False)
    top_n_recommendations = recommendations.head(top_n)

    # Format nicely

    output = ""
    for index, row in top_n_recommendations.iterrows():
        output += f"   **Business ID:** {row['business_not_rated']}\n"
        output += f"   - Predicted Rating: {row['predicted_rating']*4+1:.2f}\n\n"

    return output

In [None]:
user_id = 101074926318992653684
restult = recommend_restaurants(user_id, top_n=3)
print(restult)

[1m872/872[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
   **Business ID:** 6045bdb33ed2666ae7ac1c28
   - Predicted Rating: 4.98

   **Business ID:** 6047a339b1a0aaee3eef8ff2
   - Predicted Rating: 4.98

   **Business ID:** 604f2e1288c7af3f893e631b
   - Predicted Rating: 4.98




In [None]:
import gradio as gr

In [None]:
with gr.Blocks() as demo:
    gr.Markdown("New Tasty Recommender")
    gr.Markdown("Enter your **User ID** and we'll find your perfect restaurant match!")

    user_id_input = gr.Textbox(label="Your User ID")
    submit_btn = gr.Button("Get Recommendations")
    output_text = gr.Markdown()

    submit_btn.click(
        fn=recommend_restaurants,
        inputs=[user_id_input],
        outputs=[output_text]
    )

demo.launch(debug=True)

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://0951ac27de2e770d5b.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


[1m872/872[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m872/872[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://0951ac27de2e770d5b.gradio.live




In [None]:
demo.close()

Closing server running on port: 7860
