In [3]:
import json
import re
import pandas as pd
from pandas.io.json import json_normalize
import numpy as np
from ast import literal_eval

import os
import plaidml.keras
plaidml.keras.install_backend()
os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"

import xlearn as xl
import tensorflow.keras as keras
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler

pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 110)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [7]:
class ReviewData():
    def __init__(self):
        self.review_df = pd.DataFrame()
        self.restaurant_df = pd.DataFrame()
        self.restaurant_whole_df = pd.DataFrame()
        self.load_data()
    
    def load_data(self):
        df = pd.read_csv('review.csv',index_col='Unnamed: 0', converters={'categories': literal_eval})
        self.review_df = df
        df = pd.read_csv('restaurant.csv',index_col='Unnamed: 0')
        self.restaurant_df = df
        df = pd.read_csv('restaurant_whole.csv',index_col='Unnamed: 0')
        self.restaurant_whole_df = df

In [122]:
class PredictXLearn():
    def __init__(self, _data_object):
        self._user_id = ''
        self._xreviews = _data_object.review_df
        self._xrestaurants = _data_object.restaurant_df
        self._predict_df = pd.DataFrame()
        
        self.field_dict = {'Restaurant': '0', 'User': '1', 'Category': '2'}
        self.mapping_dict = {'useful': 'Restaurant', 'funny': 'Restaurant', 'cool':'Restaurant','review_count':'Restaurant',
                        'user_review_count': 'User', 'user_useful': 'User', 'user_funny':'User','user_cool':'User','fans':'User','average_stars':'User',
                        'categories': 'Category'}
        self.col_len_dict = {'useful':10,'funny':10,'cool':10,'review_count':10,'categories':88,'user_review_count':10,'user_useful':10,'user_funny':10,'user_cool':10,'fans':10,'average_stars':1}
        self.col_accum_index_dict = {}
        cumulative = 0
        for key, value in self.col_len_dict.items():
            self.col_accum_index_dict[key] = cumulative
            cumulative = cumulative + value
            
    def build_predict_df(self, _user_id):
        predict_df = self._xreviews[['useful','funny','cool','review_count','bid']]
        predict_df = predict_df.drop_duplicates(subset=['bid'])
        predict_user_df = self._xreviews[self._xreviews['uid']==self._user_id][['stars','user_review_count','user_useful','user_funny','user_cool','fans','average_stars','uid']]
        predict_user_df = predict_user_df.drop_duplicates(subset=['uid'])
        predict_df['key'] = 0
        predict_user_df['key'] = 0
        predict_df = predict_df.merge(predict_user_df, how='outer')
        self._predict_df = predict_df
        
    def build_libffm(self, _user_id):
        self._user_id = _user_id
        self.build_predict_df(_user_id)
        with open('xlearn_dataset/predict_'+self._user_id+'.txt', 'w') as txt_file:
            predict_df = self._predict_df
            for idx, row in predict_df.iterrows():
                vec = []
                label = row['stars']
                vec.append(str(label))
                row = row.drop(labels=['stars','bid','uid','key'])
                for key, value in row.items():
                    if key == 'average_stars':
                        col_idx = self.col_accum_index_dict[key]
                        out_val = value
                        vec.append(self.field_dict[self.mapping_dict[key]] + ":" + str(col_idx) + ":" + str(out_val))
                    elif key == 'categories':
                        col_idx = self.col_accum_index_dict[key] - 1
                        out_val = 1
                        [vec.append(self.field_dict[self.mapping_dict[key]] + ":" + str(col_idx+n) + ":" + str(out_val)) for n in value if n >0]
                    else:
                        col_idx = self.col_accum_index_dict[key] + (int(value) - 1)
                        out_val = 1
                        vec.append(self.field_dict[self.mapping_dict[key]] + ":" + str(col_idx) + ":" + str(out_val))
                txt_file.write("%s\n" % " ".join(vec))
                
    def predict(self, num=10):
        fm_model = xl.create_fm()
        # Prediction task
        predict_path = 'xlearn_dataset/predict_'+self._user_id+'.txt'
        fm_model.setTest(predict_path)  # Set the path of test dataset
        # Start to predict
        # The output result will be stored in output.txt
        fm_model.predict('final_model/xLearn_v0.1.out', 'xlearn_dataset/output_'+self._user_id+'.txt')
        result = pd.read_csv('xlearn_dataset/output_'+self._user_id+'.txt', header=None, names=['final_stars'])        
        result = pd.concat([self._xrestaurants,result], axis=1).sort_values('final_stars', ascending=False)[:num]

        return result
    
    def simple_predict(self):
        fm_model = xl.create_fm()
        # Prediction task
        predict_path = 'xlearn_dataset/predict_'+self._user_id+'.txt'
        fm_model.setTest(predict_path)  # Set the path of test dataset
        # Start to predict
        # The output result will be stored in output.txt
        fm_model.predict('final_model/xLearn_v0.1.out', 'xlearn_dataset/output_'+self._user_id+'.txt')
        result = pd.read_csv('xlearn_dataset/output_'+self._user_id+'.txt', header=None, names=['xlearn_stars'])        
        result = pd.concat([self._xrestaurants['business_id'],result], axis=1)

        return result

In [8]:
a = ReviewData()
b = PredictXLearn(a)

  exec(code_obj, self.user_global_ns, self.user_ns)


In [9]:
b.build_libffm('F_N4NyoMJWzBYFspeYfH3A')
c = b.predict()
c

Unnamed: 0,business_id,user_id,stars,useful,funny,cool,date,name,restaurant_stars,review_count,categories,user_review_count,user_useful,user_funny,user_cool,fans,average_stars,bid,uid
1,3585,16135,4.0,1,0,0,2010-10-06 18:20:13,Carluccio's Tivoli Gardens,4.0,40,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",21,32,9,5,1,2.86,PZ-LZzSlhSe9utkQYU8pFg,jYcf_e5p0UG0S-9gJq_tNA
2,3585,17440,5.0,1,0,0,2010-01-13 00:35:45,Carluccio's Tivoli Gardens,4.0,40,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",18,34,16,2,0,3.89,PZ-LZzSlhSe9utkQYU8pFg,nQC0JiPIk_jCooRDxpuw5A
3,3585,6281,3.0,1,0,0,2008-08-23 20:30:33,Carluccio's Tivoli Gardens,4.0,40,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",149,170,59,81,24,4.05,PZ-LZzSlhSe9utkQYU8pFg,Gv_-mtOKhWFtCjn9xFe0SQ
4,3585,18189,3.0,1,0,0,2009-06-01 20:02:55,Carluccio's Tivoli Gardens,4.0,40,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",59,45,10,12,2,3.8,PZ-LZzSlhSe9utkQYU8pFg,pabMYegF28KjHQ5hybAJ0A
5,3585,5806,4.0,0,0,0,2010-05-22 16:54:57,Carluccio's Tivoli Gardens,4.0,40,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",401,436,118,107,9,3.6,PZ-LZzSlhSe9utkQYU8pFg,F_N4NyoMJWzBYFspeYfH3A


In [134]:
class PredictDeepFM():
    def __init__(self, _data_object, n_max_category = 12, n_category = 88):
        self._user_id = ''
        self._xreviews = _data_object.review_df
        self._xrestaurants = _data_object.restaurant_df
        self._xdf2xy = pd.DataFrame()
        self._predict_df = pd.DataFrame()
        self._n_max_category = n_max_category
        self._n_category = n_category
        self._deep_fm_model = load_model('final_model/deepFM_v0.1.h5')
    
    def build_predict_df(self, _user_id):
        final_k = pd.DataFrame({'business_id':self._xreviews['business_id'].unique()})
        final_review = final_k.merge(self._xreviews[['business_id','name','bid','restaurant_stars','review_count','categories']], how='left',on='business_id')
        final_review = final_review.drop_duplicates(subset=['business_id'])
        final_user = self._xreviews[self._xreviews['uid']==_user_id][['user_id','user_review_count','user_useful','user_funny','user_cool','fans','average_stars','stars']]
        final_user = final_user.drop_duplicates(subset=['user_id'])
        final_review['key'] = 0
        final_user['key'] = 0
        final_df = final_review.merge(final_user, how='outer').drop(['key'],axis=1)
        self._predict_df = final_df
        
    def build_df2xy(self, _user_id):
        self._user_id = _user_id
        self.build_predict_df(_user_id)
        self._predict_df = [self._predict_df.restaurant_stars.values,
                       self._predict_df.review_count.values,
                       self._predict_df.average_stars.values,
                       self._predict_df.user_id.values, 
                       self._predict_df.business_id.values, 
                       np.concatenate(self._predict_df.categories.values).reshape(-1,self._n_max_category)]
        
    def predict(self, num=10):
        result = pd.DataFrame(self._deep_fm_model.predict(self._predict_df), columns=['final_stars'])
        result = pd.concat([self._xrestaurants,result], axis=1).sort_values('final_stars', ascending=False)[:num]
        return result
    
    def simple_predict(self):
        result = pd.DataFrame(self._deep_fm_model.predict(self._predict_df), columns=['deepfm_stars'])
        result = pd.concat([self._xrestaurants['business_id'],result], axis=1)
        return result

In [135]:
d = PredictDeepFM(a)



In [136]:
d.build_df2xy('F_N4NyoMJWzBYFspeYfH3A')
e = d.predict()
e

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,categories,attributes.GoodForKids,attributes.RestaurantsReservations,attributes.Caters,attributes.NoiseLevel,attributes.RestaurantsTableService,attributes.RestaurantsTakeOut,attributes.RestaurantsPriceRange2,attributes.OutdoorSeating,attributes.BikeParking,attributes.HasTV,attributes.WiFi,attributes.Alcohol,attributes.RestaurantsAttire,attributes.RestaurantsGoodForGroups,attributes.RestaurantsDelivery,attributes.GoodForMeal.dessert,attributes.GoodForMeal.latenight,attributes.GoodForMeal.lunch,attributes.GoodForMeal.dinner,attributes.GoodForMeal.brunch,attributes.GoodForMeal.breakfast,attributes.BusinessParking.garage,attributes.BusinessParking.street,attributes.BusinessParking.validated,attributes.BusinessParking.lot,attributes.BusinessParking.valet,attributes.Ambience.romantic,attributes.Ambience.intimate,attributes.Ambience.classy,attributes.Ambience.hipster,attributes.Ambience.divey,attributes.Ambience.touristy,attributes.Ambience.trendy,attributes.Ambience.upscale,attributes.Ambience.casual,attributes.BusinessAcceptsCreditCards,attributes.BusinessAcceptsBitcoin,attributes.ByAppointmentOnly,attributes.AcceptsInsurance,attributes.GoodForDancing,attributes.CoatCheck,attributes.HappyHour,attributes.Music.dj,attributes.Music.background_music,attributes.Music.no_music,attributes.Music.jukebox,attributes.Music.live,attributes.Music.video,attributes.Music.karaoke,attributes.BestNights.monday,attributes.BestNights.tuesday,attributes.BestNights.friday,attributes.BestNights.wednesday,attributes.BestNights.thursday,attributes.BestNights.sunday,attributes.BestNights.saturday,attributes.Ambience,attributes.BusinessParking,attributes.DogsAllowed,attributes.BYOBCorkage,attributes.DriveThru,attributes.Smoking,attributes.AgesAllowed,attributes.Corkage,attributes.BYOB,attributes.GoodForMeal,attributes.Music,attributes.DietaryRestrictions.dairy-free,attributes.DietaryRestrictions.gluten-free,attributes.DietaryRestrictions.vegan,attributes.DietaryRestrictions.kosher,attributes.DietaryRestrictions.halal,attributes.DietaryRestrictions.soy-free,attributes.DietaryRestrictions.vegetarian,attributes.BestNights,attributes.Open24Hours,attributes.DietaryRestrictions,attributes.RestaurantsCounterService,final_stars
3334,wMqu9LseUcq7bqWWY-3oTQ,Jamba Juice,10830 W Charleston Blvd,Las Vegas,NV,89135.0,36.161244,-115.3316,3.5,27,1,"Food,Juice Bars & Smoothies",,,True,,,True,2,,True,,free,,,,,,,,,,,False,False,False,False,False,,,,,,,,,,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,6.011297
7028,kpC__sWtWkLdSOI2xxdirg,Paddy's Pub,"4160 S Pecos Rd, Ste 1",Las Vegas,NV,89121.0,36.113653,-115.100011,3.0,6,1,"Bars,Pubs",,False,,average,,,2,False,False,True,free,full_bar,,True,,,,,,,,False,False,False,True,False,False,False,False,False,False,False,False,False,True,True,False,,,,False,True,,,,,,,,,,,,,,,,,False,,,yes,,,,,,,,,,,,,,,,,6.009379
5881,yUzII0aClaBc7K7kKY6bJQ,Village Meat & Wine,"5025 S Eastern Ave, Ste 23",Las Vegas,NV,89119.0,36.098107,-115.119846,4.5,41,1,"Beer,Wine & Spirits,Food,Specialty Food",,,,,,False,2,,True,,,,,,,,,,,,,False,False,False,True,False,,,,,,,,,,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.89732
2854,zpXTbidP23E8Yaxyh8c4HA,Port of Subs,4412 N Rancho Dr,Las Vegas,NV,89130.0,36.239972,-115.230348,4.0,8,0,"Restaurants,Sandwiches",True,False,True,quiet,,True,1,False,,False,free,none,casual,True,False,,,,,,,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.867804
4972,y2plF1wqtEiOdzArakENfg,TangTangTang 2,"8635 S Rainbow Blvd, Ste 110",Las Vegas,NV,89139.0,36.03172,-115.244304,4.5,77,1,"Barbeque,Restaurants,Soup",True,True,False,,True,True,1,False,,False,no,beer_and_wine,casual,True,False,False,False,True,True,True,False,False,False,False,False,False,False,True,True,True,False,False,True,False,True,True,,,,,,,,,,,,,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,,,5.837663
3945,hb2u9dUQkbWYIHhXCiFsrg,Mama Ilardo's Pizzeria,3799 Las Vegas Blvd S,Las Vegas,NV,89109.0,36.102916,-115.169755,3.5,10,0,"Pizza,Restaurants",True,False,,,,True,1,False,,False,no,beer_and_wine,casual,True,False,,,,,,,False,False,False,False,False,,,,,,,,,,True,,,,,,,,,,,,,,,,,,,,,,,,yes_free,,,,,,,,,,,,,,,,,,,5.822811
3403,nxzqOt8sABsgf53c4CgMqg,Beso,"Crystal-City Ctr, 3720 Las Vegas Blvd S",Las Vegas,NV,89118.0,36.08082,-115.22273,3.5,47,0,"Bars,Steakhouses,Restaurants",False,True,False,average,,False,4,True,,True,no,full_bar,dressy,True,False,,,,,,,True,False,False,False,True,False,False,True,False,False,False,False,False,False,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.819855
2120,tveb-DkZ0lnwgKb_oavl6A,Smith's Food & Drug Centers,7130 N Durango Dr,Las Vegas,NV,89149.0,36.290648,-115.285859,3.5,116,1,Food,,,False,,,True,2,,True,,,beer_and_wine,,,False,,,,,,,False,False,False,True,False,,,,,,,,,,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.818458
5047,h84tU6REsM1VzyUCP0Ss7Q,Baja Fresh,"9310 S Eastern Ave, Ste 101",Las Vegas,NV,89123.0,36.020108,-115.116774,3.0,44,0,Restaurants,True,False,True,average,,True,1,True,True,True,no,none,casual,True,False,,,,,,,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.770861
658,mFRfXEWf2KgRolCEj2lDUw,GFW Coffee,"4545 Spring Mountain Rd, Ste 103",Las Vegas,NV,89102.0,36.126289,-115.203241,3.5,20,0,"Food,Coffee & Tea",,,,,,,1,True,False,,free,,,,,,,,,,,False,False,False,True,False,,,,,,,,,,True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.758111


In [152]:
class Enssenble():
    def __init__(self, _data_obj):
        self._data_object = _data_obj
        self._xlearn = PredictXLearn(self._data_object)
        self._deepfm = PredictDeepFM(self._data_object)
        
    def predict(self, _user_id, _num = 10):
        self._xlearn.build_libffm(_user_id)
        self._deepfm.build_df2xy(_user_id)        
        
        xlearn = self._xlearn.simple_predict()
        deepfm = self._deepfm.simple_predict()
                                 
        result = self._data_object.restaurant_whole_df
        result = result.merge(xlearn, on='business_id', how='left').merge(deepfm, on='business_id', how='left')
        
        scaler = MinMaxScaler(feature_range=(1,10))
        result['deepfm_stars'] = scaler.fit_transform(result[['deepfm_stars']])
        result['xlearn_stars'] = scaler.fit_transform(result[['xlearn_stars']])
        #result['svd_stars'] = scaler.fit_transform(result[['svd_stars']])
        result['final_stars'] = result['deepfm_stars']*0.5 + result['xlearn_stars']*0.5
        #result['final_stars'] = result.apply(lambda x : result['svd_stars'] if x['user_id'] not in self._data_object.review_df['user_id'].values else result['deepfm_stars']*0.4 + result['xlearn_stars']*0.3 + result['svd_stars']*0.3)
        result = result.sort_values('final_stars', ascending = False)[:_num]
        
        return result        

In [126]:
b.simple_predict().describe()

Unnamed: 0,xlearn_stars
count,8655.0
mean,3.977247
std,0.169283
min,3.01486
25%,3.89879
50%,3.99144
75%,4.06145
max,4.78508


In [137]:
d.simple_predict().describe()

Unnamed: 0,deepfm_stars
count,8655.0
mean,3.003921
std,1.032167
min,-1.253405
25%,2.439859
50%,2.972907
75%,3.725081
max,6.011297


In [153]:
a = ReviewData()
f = Enssenble(a)

  exec(code_obj, self.user_global_ns, self.user_ns)


In [155]:
g = f.predict('gEBRecs8QUw9kxlbqIN_sA')
g

ValueError: cannot convert float NaN to integer

In [None]:
from sklearn.preprocessing import StandardScaler, RobustScaler
scaler = RobustScaler()
deepfm_df['deepfm_final_score'] = scaler.fit_transform(deepfm_df[['deepfm_final_score']])
xlearn_df['xlearn_final_score'] = scaler.fit_transform(xlearn_df[['xlearn_final_score']])
final_df = final_df.merge(deepfm_df,on='business_id',how='left').merge(xlearn_df, on='business_id',how='left')
final_df['final_score'] = final_df['deepfm_final_score'] * 0.3 + final_df['xlearn_final_score'] * 0.7
final_df = final_df.sort_values('final_score',ascending=False)

In [None]:
'F_N4NyoMJWzBYFspeYfH3A' in a.review_df['uid'].values