In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
#import cv2

import tensorflow as tf
from tensorflow.keras.preprocessing import image
from scipy.sparse.linalg import svds

In [2]:
from src.models import (load_data
                        , data_summary
                        , combine_tables
                        , collab_mat
                        , svd_mat
                       )

In [3]:
biz_df, rev_df, ckin_df, pho_df, tip_df, user_df = load_data()

In [4]:
data_summary(biz_df
             , rev_df
             , ckin_df
             , pho_df
             , tip_df
             , user_df)

name,rows,colums
business,192609,14
review,6685900,9
checkin,161950,2
photo,200000,4
tip,1223094,5
user,1637138,22


In [5]:
user_rev_biz = combine_tables(user_df
                              , rev_df
                              , biz_df)

In [10]:
user_rev_biz.city.value_counts()[0:5]

Las Vegas     2030798
Phoenix        734136
Toronto        525437
Scottsdale     387550
Charlotte      309425
Name: city, dtype: int64

In [11]:
user_biz_collab_mat = collab_mat('Scottsdale'
                                 , user_rev_biz)

In [12]:
user_biz_collab_mat.shape

(153562, 8837)

In [31]:
user_id_list = np.array(user_biz_collab_mat.index)

In [76]:
len(user_id_list)

153562

In [74]:
user_rev_biz['user_name'].loc[user_rev_biz.user_id == '---PLwSf5gKdIoVnyRHgBA'].unique()[0]

'Rae'

In [32]:
biz_id_list = np.array(user_biz_collab_mat.columns)

In [75]:
len(biz_id_list)

8837

### SVD 

In [13]:
user_biz_predictions = svd_mat(user_biz_collab_mat, k=10)

In [14]:
user_biz_predictions.shape

(153562, 8837)

## Predictions Output

In [77]:
biz = user_biz_predictions[0].argsort()[-5:][::-1]

In [78]:
biz

array([4494, 1126, 1879, 4866, 1683])

In [79]:
biz_id_list[biz]

array(['VLDFjeqpUgWhnVuB_8GuEg', '6nKR80xEGHYf2UxAe_Cu_g',
       'C8D_GU9cDDjbOJfCaGXxDQ', 'YSYDJTCt5kzY2kKnLNGRiA',
       'ArmAaGCdCVAlkhdtaT6RzQ'], dtype=object)

In [82]:
user_rev_biz['biz_name'].loc[user_rev_biz.business_id == 'VLDFjeqpUgWhnVuB_8GuEg'].unique()[0]

'True Food Kitchen'

In [81]:
user_rev_biz.columns

Index(['user_id', 'user_name', 'user_review_count', 'yelping_since',
       'useful_user_sent', 'funny_user_sent', 'cool_user_sent', 'elite',
       'friends', 'fans', 'average_stars', 'compliment_hot', 'compliment_more',
       'compliment_profile', 'compliment_cute', 'compliment_list',
       'compliment_note', 'compliment_plain', 'compliment_cool',
       'compliment_funny', 'compliment_writer', 'compliment_photos',
       'review_id', 'business_id', 'stars_rev', 'useful_rev', 'funny_rev',
       'cool_rev', 'rev_text', 'date', 'biz_name', 'address', 'city', 'state',
       'postal_code', 'latitude', 'longitude', 'biz_star', 'biz_review_count',
       'is_open', 'attributes', 'categories', 'hours'],
      dtype='object')

In [26]:
user_id_list[0]

'---PLwSf5gKdIoVnyRHgBA'

In [42]:
np.argwhere(user_id_list == '---PLwSf5gKdIoVnyRHgBA')[0][0]

0

In [87]:
def top_biz(name_id, n=5):
    name = user_rev_biz['user_name'].loc[user_rev_biz.user_id == name_id].unique()[0]
    biz = user_biz_predictions[0].argsort()[-n:][::-1]
    
    biz_list = biz_id_list[biz]
    
    top_biz_list = []
    
    for bz in biz_list:
        top_biz_list.append(user_rev_biz['biz_name'].loc[user_rev_biz.business_id == bz].unique()[0])
    return name, top_biz_list

In [89]:
top_biz('---PLwSf5gKdIoVnyRHgBA', n=10)

('Rae',
 ['True Food Kitchen',
  'The Thumb BBQ',
  'Culinary Dropout',
  'SOL Mexican Cocina',
  'Hopdoddy Burger Bar',
  "Coconut's Fish Cafe",
  'North Italia',
  'Salty Sow',
  'Citizen Public House',
  'Fogo de Chão Brazilian Steakhouse'])

In [90]:
user_rev_biz['biz_name'].loc[user_rev_biz.user_id == '---PLwSf5gKdIoVnyRHgBA']

1383842      iPic Theaters
5233139    Whimsical Nails
Name: biz_name, dtype: object

In [None]:
user_rev_biz['is_open']