In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tabulate

from rec_sys import build_user_item_networks, build_model, scale_data, predict_ratings
from utils import load_data, encode_features, get_restaurant_names_cuisines
from constants import *


### 1. Load training data into matrices

In [2]:
# Load data into matrices
user_data, user_vectors, restaurant_data, restaurant_vectors, y_data, num_users, num_restaurants = load_data()

# Scale data with sklearn scalers
y_data, y_scaler = scale_data(y_data.reshape(-1, 1), 'minmax')

# Train test split (identical shuffle needed to keep training examples matched)
user_train      , user_test       = train_test_split(user_data      , train_size=0.8, shuffle=True, random_state=1)
restaurant_train, restaurant_test = train_test_split(restaurant_data, train_size=0.8, shuffle=True, random_state=1)
y_train         , y_test          = train_test_split(y_data         , train_size=0.8, shuffle=True, random_state=1)

print(f"user_train: {user_train.shape}, user_test: {user_test.shape}")
print(f"restaurant_train: {restaurant_train.shape}, restaurant_test: {restaurant_test.shape}")
print(f"y_train: {y_train.shape}, y_test: {y_test.shape}")
print(f"num_users: {num_users}")
print(f"num_restaurants: {num_restaurants}")


user_train: (928, 12), user_test: (233, 12)
restaurant_train: (928, 8), restaurant_test: (233, 8)
y_train: (928, 1), y_test: (233, 1)
num_users: 138
num_restaurants: 130


### 2. Build neural networks

In [3]:
layer_units = [
    64, # Dense, relu
    32, # Dense, relu
    16  # Dense, linear
]

user_NN, item_NN = build_user_item_networks(layer_units)

num_user_features = user_data.shape[1] - 1 # subtract 1 for id column
num_restaurant_features = restaurant_data.shape[1] - 1

model = build_model(user_NN, item_NN, num_user_features, num_restaurant_features)
print(model.summary())




Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 11)]                 0         []                            
                                                                                                  
 input_2 (InputLayer)        [(None, 7)]                  0         []                            
                                                                                                  
 sequential (Sequential)     (None, 16)                   3376      ['input_1[0][0]']             
                                                                                                  
 sequential_1 (Sequential)   (None, 16)                   3120      ['input_2[0][0]']             
                                                                                              

### 3. Train the model

In [4]:
model.fit([user_train[:, 1:], restaurant_train[:, 1:]], y_train, epochs=100)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x2862fae90>

### 4. Evaluate model on test data

In [5]:
model.evaluate([user_test[:, 1:], restaurant_test[:, 1:]], y_test)




0.13830821216106415

### 5. Test on a new user

In [6]:
# 'n' stands for 'new'
n_id = 5000
n_latitude = 37.7749
n_longitude = 122.4194
n_smoker = "false"
n_drink_level = "abstemious"
n_dress_preference = "informal"
n_ambience = "?"
n_transport = "car owner"
n_marital_status = "single"
n_hijos = "independent"
n_birth_year = 2004
n_interest = "technology"
n_personality = "hard-worker"
n_religion = "none"
n_activity = "student"
n_weight = 61
n_budget = "medium"
n_height = 1.778
n_cuisines = ["Breakfast-Brunch", "Chinese", "Greek", "Indian-Pakistani", "Thai"]

new_user_features = pd.Series(
    [
        n_id, 
        # n_latitude, 
        # n_longitude, 
        n_smoker, 
        n_drink_level,
        n_dress_preference, 
        n_ambience, 
        n_transport, 
        n_marital_status,
        n_hijos, 
        # n_birth_year, 
        n_interest, 
        n_personality, 
        # n_religion, 
        n_activity, 
        # n_weight, 
        n_budget, 
        # n_height
    ],
    index=USER_FEATURES
)

new_user_arr = np.array(encode_features(pd.DataFrame([new_user_features]), USER_FEATURE_OPTIONS).loc[0])


In [7]:
# Create as many copies of the user's data as there are restaurants
new_user_matrix = np.tile(new_user_arr, (num_restaurants, 1))

# Run prediction
new_user_pred = predict_ratings(model, new_user_matrix[:, 1:], restaurant_vectors[:, 1:], y_scaler)

# Print out predictions in a table format
sorted_indices = np.argsort(-new_user_pred)
new_user_pred = new_user_pred[sorted_indices]
new_user_restaurants = restaurant_vectors[sorted_indices]
names, cuisines = get_restaurant_names_cuisines(new_user_restaurants[:, 0].astype(int))

display = [["Rating Prediction", "Place ID", "Name", "Cuisine"]]
for i in range(len(new_user_pred)):
    display.append([
        new_user_pred[i], 
        new_user_restaurants[i, 0], 
        names[i],
        cuisines[i]
    ])
tabulate.tabulate(display, tablefmt='html', headers="firstrow")




Rating Prediction,Place ID,Name,Cuisine
1.9267,132768,Mariscos Tia Licha,Family
1.91476,132847,don burguers,?
1.91405,132955,emilianos,Bar_Pub_Brewery
1.91216,135030,Preambulo Wifi Zone Cafe,?
1.9076,132958,tacos los volcanes,American
1.89992,132755,La Estrella de Dimas,Mexican
1.89562,132862,La Posada del Virrey,International
1.89055,132613,carnitas_mata,Mexican
1.88083,132715,tacos de la estacion,Mexican
1.88029,132846,el lechon potosino,International
