In [17]:
import json
import re
import pandas as pd
from pandas.io.json import json_normalize
import numpy as np

import os
import plaidml.keras
plaidml.keras.install_backend()
os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"

import xlearn as xl
import tensorflow.keras as keras
from tensorflow.keras.models import load_model

pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 110)

In [9]:
class ReviewData():
    def __init__(self):
        self.review_df = pd.DataFrame()
        self.restaurant_df = pd.DataFrame()
        self.load_data()
    
    def load_data(self):
        df = pd.read_csv('review.csv',index_col='Unnamed: 0')
        self.review_df = df
        df = pd.read_csv('restaurant.csv',index_col='Unnamed: 0')
        self.restaurant_df = df

In [10]:
class PredictXLearn():
    def __init__(self, _data_object):
        self._user_id = ''
        self._xreviews = _data_object.review_df
        self._xrestaurants = _data_object.restaurant_df
        self._predict_df = pd.DataFrame()
        
        self.field_dict = {'Restaurant': '0', 'User': '1', 'Category': '2'}
        self.mapping_dict = {'useful': 'Restaurant', 'funny': 'Restaurant', 'cool':'Restaurant','review_count':'Restaurant',
                        'user_review_count': 'User', 'user_useful': 'User', 'user_funny':'User','user_cool':'User','fans':'User','average_stars':'User',
                        'categories': 'Category'}
        self.col_len_dict = {'useful':10,'funny':10,'cool':10,'review_count':10,'categories':88,'user_review_count':10,'user_useful':10,'user_funny':10,'user_cool':10,'fans':10,'average_stars':1}
        self.col_accum_index_dict = {}
        cumulative = 0
        for key, value in self.col_len_dict.items():
            self.col_accum_index_dict[key] = cumulative
            cumulative = cumulative + value
            
    def build_predict_df(self, _user_id):
        predict_df = self._xreviews[['useful','funny','cool','review_count','bid']]
        predict_df = predict_df.drop_duplicates(subset=['bid'])
        predict_user_df = self._xreviews[self._xreviews['uid']==self._user_id][['stars','user_review_count','user_useful','user_funny','user_cool','fans','average_stars','uid']]
        predict_user_df = predict_user_df.drop_duplicates(subset=['uid'])
        predict_df = pd.concat([predict_df, predict_user_df], axis=1)
        self._predict_df = predict_df
        
    def build_libffm(self, _user_id):
        self._user_id = _user_id
        self.build_predict_df(_user_id)
        with open('xlearn_dataset/predict_'+self._user_id+'.txt', 'w') as txt_file:
            predict_df = self._predict_df
            for idx, row in predict_df.iterrows():
                vec = []
                label = row['stars']
                vec.append(str(label))
                row = row.drop(labels=['stars','bid','uid','key'])
                for key, value in row.items():
                    if key == 'average_stars':
                        col_idx = self.col_accum_index_dict[key]
                        out_val = value
                        vec.append(self.field_dict[self.mapping_dict[key]] + ":" + str(col_idx) + ":" + str(out_val))
                    elif key == 'categories':
                        col_idx = self.col_accum_index_dict[key] - 1
                        out_val = 1
                        [vec.append(self.field_dict[self.mapping_dict[key]] + ":" + str(col_idx+n) + ":" + str(out_val)) for n in value if n >0]
                    else:
                        col_idx = self.col_accum_index_dict[key] + (int(value) - 1)
                        out_val = 1
                        vec.append(self.field_dict[self.mapping_dict[key]] + ":" + str(col_idx) + ":" + str(out_val))
                txt_file.write("%s\n" % " ".join(vec))
                
    def predict(self, num=10):
        fm_model = xl.create_fm()
        # Prediction task
        predict_path = 'xlearn_dataset/predict_'+self._user_id+'.txt'
        fm_model.setTest(predict_path)  # Set the path of test dataset
        # Start to predict
        # The output result will be stored in output.txt
        fm_model.predict('final_model/xLearn_v0.1.out', 'xlearn_dataset/output_'+self._user_id+'.txt')
        result = pd.read_csv('xlearn_dataset/output_'+self._user_id+'.txt', header=None, names=['final_stars'])        
        result = pd.concat([self._xrestaurants,result], axis=1).sort_values('final_stars', ascending=False)[:num]

        return result

In [11]:
a = ReviewData()
b = PredictXLearn(a)

In [65]:
b.build_libffm('F_N4NyoMJWzBYFspeYfH3A')
c = b.predict()
c

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,categories,attributes.GoodForKids,attributes.RestaurantsReservations,attributes.Caters,attributes.NoiseLevel,attributes.RestaurantsTableService,attributes.RestaurantsTakeOut,attributes.RestaurantsPriceRange2,attributes.OutdoorSeating,attributes.BikeParking,attributes.HasTV,attributes.WiFi,attributes.Alcohol,attributes.RestaurantsAttire,attributes.RestaurantsGoodForGroups,attributes.RestaurantsDelivery,attributes.GoodForMeal.dessert,attributes.GoodForMeal.latenight,attributes.GoodForMeal.lunch,attributes.GoodForMeal.dinner,attributes.GoodForMeal.brunch,attributes.GoodForMeal.breakfast,attributes.BusinessParking.garage,attributes.BusinessParking.street,attributes.BusinessParking.validated,attributes.BusinessParking.lot,attributes.BusinessParking.valet,attributes.Ambience.romantic,attributes.Ambience.intimate,attributes.Ambience.classy,attributes.Ambience.hipster,attributes.Ambience.divey,attributes.Ambience.touristy,attributes.Ambience.trendy,attributes.Ambience.upscale,attributes.Ambience.casual,attributes.BusinessAcceptsCreditCards,attributes.BusinessAcceptsBitcoin,attributes.ByAppointmentOnly,attributes.AcceptsInsurance,attributes.GoodForDancing,attributes.CoatCheck,attributes.HappyHour,attributes.Music.dj,attributes.Music.background_music,attributes.Music.no_music,attributes.Music.jukebox,attributes.Music.live,attributes.Music.video,attributes.Music.karaoke,attributes.BestNights.monday,attributes.BestNights.tuesday,attributes.BestNights.friday,attributes.BestNights.wednesday,attributes.BestNights.thursday,attributes.BestNights.sunday,attributes.BestNights.saturday,attributes.Ambience,attributes.BusinessParking,attributes.DogsAllowed,attributes.BYOBCorkage,attributes.DriveThru,attributes.Smoking,attributes.AgesAllowed,attributes.Corkage,attributes.BYOB,attributes.GoodForMeal,attributes.Music,attributes.DietaryRestrictions.dairy-free,attributes.DietaryRestrictions.gluten-free,attributes.DietaryRestrictions.vegan,attributes.DietaryRestrictions.kosher,attributes.DietaryRestrictions.halal,attributes.DietaryRestrictions.soy-free,attributes.DietaryRestrictions.vegetarian,attributes.BestNights,attributes.Open24Hours,attributes.DietaryRestrictions,attributes.RestaurantsCounterService,final_stars
2188,JFNv5sKeKAf1ZVPnfbzpXw,East West Pizzeria,5025 S Fort Apache Rd,Las Vegas,NV,89148.0,36.097968,-115.297796,4.0,153,1,"Pizza,Restaurants,Food",True,False,True,quiet,,True,2,False,True,True,free,none,casual,True,True,False,False,True,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,True,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.78508
4119,odQGs2Rv0nC0fHh4Gfp2iA,Ramen Noodle,"3350 Novat St, Ste 135",Las Vegas,NV,89129.0,36.221074,-115.328577,4.5,153,1,"Ramen,Sushi Bars,Restaurants,Noodles",True,True,,quiet,True,True,2,False,True,True,no,none,casual,True,True,True,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.78508
6349,Ih5Q1cNDLchhi1FqqTwrag,Turmeric Flavors of India,700 E Fremont St,Las Vegas,NV,89101.0,36.168374,-115.138119,4.5,153,1,"Restaurants,Bars",True,True,True,average,True,True,2,True,True,True,free,full_bar,casual,True,False,False,False,True,True,False,False,False,True,False,False,False,False,False,True,False,False,False,True,False,True,True,,,,False,False,True,False,False,False,False,False,False,False,False,False,True,False,True,False,True,,,False,,,outdoor,,,,,,,,,,,,,,,,,4.78508
6033,2Wl4o7l54I-EtxtaDpOTzA,Pub 365,255 E Flamingo Rd,Las Vegas,NV,89169.0,36.113105,-115.160233,4.0,153,1,"Bars,Restaurants,Beer Bar,Burgers,Salad,Pubs",False,True,True,average,True,True,2,False,True,True,free,full_bar,casual,True,False,False,True,True,True,False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,True,True,,,,False,False,True,False,False,False,False,False,False,False,False,True,True,False,False,False,True,,,,,False,no,,,,,,,,,,,,,,,,,4.77967
8435,JuO9QpkdmbTS95GSXh4dZw,Phillips Seafood,"3570 Las Vegas Blvd, Ste 200, Caesars Palace",Las Vegas,NV,89109.0,36.116219,-115.174572,3.0,70,0,"Seafood,Restaurants",True,False,False,loud,False,True,2,False,False,False,no,beer_and_wine,casual,True,False,False,False,True,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.76299
1737,oE6gKgUfMd74m77d6hG9qg,Patty's Tamales,"5061 East Sahara Ave, Ste 1C",Las Vegas,NV,89142.0,36.143892,-115.06416,4.0,129,1,Restaurants,True,False,True,quiet,False,True,1,False,False,False,no,none,casual,False,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.76093
2138,vF7nFTv22qA-YnCP09pvHQ,Ron's Grilled Barbeque and Kabobs,"6085 S Fort Apache Rd, Ste 140",Las Vegas,NV,89148.0,36.07826,-115.298559,4.0,70,1,"Restaurants,Barbeque",True,False,True,average,True,True,1,True,True,True,free,none,casual,True,True,False,False,True,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,True,False,,,,,,,,,,,,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,,,4.70037
4869,NyOaA9zJ_59E8ch5HgHofw,Tropical Smoothie Cafe,"7580 South Las Vegas Boulevard, Suite # 100",Las Vegas,NV,89123.0,36.052027,-115.171547,3.0,153,1,"Restaurants,Sandwiches,Food,Juice Bars & Smoot...",,,False,,,True,1,,True,,free,,,,,,,,,,,False,False,False,False,False,,,,,,,,,,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.64525
2851,gbKBMsduFZl_oBwahocNBg,Sin City Snoballs & Snacks,,Las Vegas,NV,89106.0,36.208509,-115.313391,4.0,25,0,"Food,Ice Cream & Frozen Yogurt,Restaurants,Don...",True,False,True,average,,True,1,True,True,False,,none,casual,True,False,,,,,,,True,True,False,False,True,False,False,False,False,False,False,False,False,True,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.62357
5670,G7yeF8yMra1_kymdxPVCWQ,24 Hour Soul Food Express,4855 Boulder Hwy,Las Vegas,NV,89121.0,36.117207,-115.088329,3.0,8,1,"Soul Food,Restaurants,Food",False,False,False,quiet,False,True,1,False,False,False,no,none,casual,False,True,,,,,,,,,,,,,,,,,,,,,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.59151


In [19]:
class PredictDeepFM():
    def __init__(self, _data_object, n_max_category = 12, n_category = 88):
        self._user_id = ''
        self._xreviews = _data_object.review_df
        self._xrestaurants = _data_object.restaurant_df
        self._xdf2xy = pd.DataFrame()
        self._predict_df = pd.DataFrame()
        self._n_max_category = n_max_category
        self._n_category = n_category
        self._deep_fm_model = load_model('final_model/deepFM_v0.1.h5')
    
    def build_predict_df(self, _user_id):
        final_k = pd.DataFrame({'business_id':self._xreviews['business_id'].unique()})
        final_review = final_k.merge(self._xreviews[['business_id','name','bid','restaurant_stars','review_count','categories']], how='left',on='business_id')
        final_review = final_review.drop_duplicates(subset=['business_id'])
        final_user = self._xreviews[self._xreviews['uid']==_user_id][['user_id','user_review_count','user_useful','user_funny','user_cool','fans','average_stars','stars']]
        final_user = final_user.drop_duplicates(subset=['user_id'])
        final_df = pd.concat([final_review, final_user], axis=1)
        self._predict_df = final_df
        
    def build_df2xy(self, _user_id):
        self._user_id = _user_id
        self.build_predict_df(_user_id)
        print(self._predict_df )
        self._predict_df = [self._predict_df.restaurant_stars.values,
                       self._predict_df.review_count.values,
                       self._predict_df.average_stars.values,
                       self._predict_df.user_id.values, 
                       self._predict_df.business_id.values, 
                       np.concatenate(self._predict_df.categories.values).reshape(-1, self._n_max_category)]
        
    def predict(self, num=10):
        result = self._deep_fm_model.predict(self._predict_df)
        #result = pd.concat([self._xrestaurants,result], axis=1).sort_values('final_stars', ascending=False)[:num]
        return result

In [20]:
d = PredictDeepFM(a)



In [21]:
d.build_df2xy('F_N4NyoMJWzBYFspeYfH3A')

        business_id                                 name  \
0            3585.0           Carluccio's Tivoli Gardens   
5               NaN                                  NaN   
25           2916.0                         Hunk Mansion   
30           7806.0  Maria's Mexican Restaurant & Bakery   
97           6515.0                          Dairy Queen   
...             ...                                  ...   
540406       5538.0                 Great Steak & Potato   
540407       7022.0                     Jessie Rae's BBQ   
540591        976.0                 99 Cents Only Stores   
540600       4049.0                 Laguna Champagne Bar   
540633       8019.0                            Starbucks   

                           bid  restaurant_stars  review_count  \
0       PZ-LZzSlhSe9utkQYU8pFg               4.0          40.0   
5                          NaN               NaN           NaN   
25      KWywu2tTEPWmR9JnBc0WyQ               4.0         107.0   
30      tstimHo

ValueError: zero-dimensional arrays cannot be concatenated

In [None]:
from sklearn.preprocessing import StandardScaler, RobustScaler
scaler = RobustScaler()
deepfm_df['deepfm_final_score'] = scaler.fit_transform(deepfm_df[['deepfm_final_score']])
xlearn_df['xlearn_final_score'] = scaler.fit_transform(xlearn_df[['xlearn_final_score']])
final_df = final_df.merge(deepfm_df,on='business_id',how='left').merge(xlearn_df, on='business_id',how='left')
final_df['final_score'] = final_df['deepfm_final_score'] * 0.3 + final_df['xlearn_final_score'] * 0.7
final_df = final_df.sort_values('final_score',ascending=False)