In [293]:
import numpy as np
from numpy import random as npr


class ResponseFunction:
    def __init__(self, heu_matrix, nn_matrix):
        assert heu_matrix.shape == nn_matrix.shape
        self._heu_matrix = heu_matrix
        self._nn_matrix = nn_matrix
        
    def __call__(self, a1: float, a2: float):
        assert a1 >= 0.0
        assert a2 >= 0.0
        assert a1 + a2 <= 1.0
        a3 = 1 - a1 - a2
        return (
            a1 * self._heu_matrix
            + a2 * self._nn_matrix
            + a3 * npr.normal(0, 1)
        )
    


In [4]:
import pandas as pd

In [24]:

class SimilarityModel:
    def __init__(self, sim_matrix):
        self._sim_matrix = sim_matrix
    
    def predict_rating(self, user, item):
        user_id = user[0]
        item_id = item[0]
        return self._sim_matrix[user_id][item_id] * 2
    
    def pred(self, user_df, item_df):
        user_ids = user_df["user_id"]
        item_ids = user_df["item_id"]
    


In [272]:
users_df = pd.read_csv("../data/restaurant_data_reformatted/users.csv")
restaurants_df = pd.read_csv("../data/restaurant_data_reformatted/restaurants.csv")

In [273]:
drop_cols = ['userID',
 'smoker',
 'drink_level',
 'dress_preference',
 'ambience',
 'transport',
 'marital_status',
 'hijos',
 'birth_year',
 'interest',
 'personality',
 'religion',
 'activity',
 'color',
 'weight',
 'budget',
 'height',
]

In [274]:
users_df.drop(drop_cols, axis=1)

Unnamed: 0,Afghan,African,American,Armenian,Asian,Australian,Austrian,Bagels,Bakery,Bar,...,Swiss,Tapas,Tea_House,Tex-Mex,Thai,Tibetan,Tunisian,Turkish,Vegetarian,Vietnamese
0,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,1,1,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
133,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
134,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
135,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
136,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [275]:
users_food_pref_df = users_df[restaurants_df.drop("placeID", axis=1).columns]

In [276]:
from sklearn.metrics.pairwise import cosine_similarity

In [277]:
_heu_matrix = cosine_similarity(users_food_pref_df, restaurants_df.drop("placeID", axis=1))

In [280]:
_heu_matrix.shape

(138, 769)

## DeepFM

In [15]:
import pandas as pd
import torch
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

from deepctr_torch.inputs import SparseFeat, DenseFeat, get_feature_names

data = pd.read_csv('../data/restaurant_data_reformatted/synthetic_data.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,rating,food_rating,service_rating,smoker,drink_level,dress_preference,ambience,transport,marital_status,...,Soup_y,Southern_y,Southwestern_y,Spanish_y,Steaks_y,Sushi_y,Thai_y,Turkish_y,Vegetarian_y,Vietnamese_y
0,0,1,1,2,False,social drinker,formal,family,public,single,...,0,0,0,0,0,0,0,0,0,0
1,1,1,2,1,False,social drinker,elegant,family,public,single,...,0,0,0,0,0,0,0,0,0,0
2,2,2,1,1,False,social drinker,elegant,family,public,single,...,0,0,0,0,0,0,0,0,0,0
3,3,2,1,2,False,social drinker,elegant,family,car owner,single,...,0,0,0,0,0,0,0,0,0,0
4,4,2,1,1,False,social drinker,elegant,family,public,single,...,0,0,0,0,0,0,0,0,0,0


In [16]:
data.columns

Index(['Unnamed: 0', 'rating', 'food_rating', 'service_rating', 'smoker',
       'drink_level', 'dress_preference', 'ambience', 'transport',
       'marital_status',
       ...
       'Soup_y', 'Southern_y', 'Southwestern_y', 'Spanish_y', 'Steaks_y',
       'Sushi_y', 'Thai_y', 'Turkish_y', 'Vegetarian_y', 'Vietnamese_y'],
      dtype='object', length=182)

## DeepFM train

In [141]:
class DeepFMDataLoader:
    def __init__(self, sparse_features, dense_features):
        self._sparse_feats = sparse_feats
        self._dense_feats = dense_feats
        
    def load(self, dataset):
        nn_input = pd.DataFrame()
        nn_input[self._sparse_feats] = dataset[self._sparse_feats]
        nn_input[self._dense_feats] = dataset[self._dense_feats]
        
        for feat in self._sparse_feats:
            encoder = LabelEncoder()
            nn_input[feat] = encoder.fit_transform(nn_input[feat])
            
        mms = MinMaxScaler(feature_range=(0,1))
        nn_input[self._dense_feats] = mms.fit_transform(nn_input[self._dense_feats])
        
        # problems may be here
        sparse_feature_columns = [
            SparseFeat(feat, vocabulary_size=nn_input[feat].nunique(), embedding_dim=4) 
            for i, feat in enumerate(self._sparse_feats)
        ]

        dense_feature_columns = [DenseFeat(feat, 1,) for feat in self._dense_feats]
        
        dnn_feat_cols = sparse_feature_columns + dense_feature_columns
        linear_feat_cols = sparse_feature_columns + dense_feature_columns
        
        feat_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
        return nn_input, dnn_feat_cols, linear_feat_cols, feat_names

            

In [190]:
from deepctr_torch.models import DeepFM
from torch.nn.functional import cross_entropy


class DeepFmModel:
    def __init__(self, linear_feature_columns, dnn_feature_columns, feature_names):
        self._linear_feature_columns = linear_feature_columns
        self._dnn_feature_columns = dnn_feature_columns
        self._feature_names = feature_names
        self._deepfm = DeepFM(
            self._linear_feature_columns,
            self._dnn_feature_columns,
            task='multiclass',
            device='cpu'
        )
        self._deepfm.compile("adam", "mse", metrics=['mse'], )
        
    def train(self, train_set, target_values):
        train_model_input = {n: train_set[n] for n in self._feature_names}
        history = self._deepfm.fit(
            train_model_input,
            target_values,
            batch_size=256,
            epochs=10,
            verbose=2,
            validation_split=0.2
        )

        return history

    def predict(self, test_set):
        test_model_input = {n: test_set[n] for n in self._feature_names}
        result = self._deepfm.predict(test_model_input, batch_size=256)
        return result

    #TODO: add evaluate() method

In [269]:
def merge_feats(feats_a, feats_b):
    assert len(feats_a) == len(feats_b)
    merged = []
    for feat_a, feat_b in zip(feats_a, feats_b):
        if isinstance(feat_a, DenseFeat):
            continue
        if feat_a.vocabulary_size >= feat_b.vocabulary_size:
            merged.append(feat_a)
        else:
            merged.append(feat_b)
    return merged
            


In [179]:
import pandas as pd
import torch
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

from deepctr_torch.inputs import SparseFeat, DenseFeat, get_feature_names

data = pd.read_csv('../data/restaurant_data_reformatted/synthetic_data.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,rating,food_rating,service_rating,smoker,drink_level,dress_preference,ambience,transport,marital_status,...,Soup_y,Southern_y,Southwestern_y,Spanish_y,Steaks_y,Sushi_y,Thai_y,Turkish_y,Vegetarian_y,Vietnamese_y
0,0,1,1,2,False,social drinker,formal,family,public,single,...,0,0,0,0,0,0,0,0,0,0
1,1,1,2,1,False,social drinker,elegant,family,public,single,...,0,0,0,0,0,0,0,0,0,0
2,2,2,1,1,False,social drinker,elegant,family,public,single,...,0,0,0,0,0,0,0,0,0,0
3,3,2,1,2,False,social drinker,elegant,family,car owner,single,...,0,0,0,0,0,0,0,0,0,0
4,4,2,1,1,False,social drinker,elegant,family,public,single,...,0,0,0,0,0,0,0,0,0,0


In [180]:
data = data.drop("Unnamed: 0", axis=1)

In [181]:
dense_feats = ["height", "weight", "birth_year"]
sparse_feats = [c for c in list(data.columns) if c not in ["rating", "food_rating", "service_rating", "weight", "height", "birth_year"]]

In [182]:
data_loader = DeepFMDataLoader(sparse_feats, dense_feats)
nn_input, dnn_feats, lin_feats, feat_names = data_loader.load(data)

In [261]:
users_df = pd.read_csv("../data/restaurant_data_reformatted/users.csv")
restaurants_df = pd.read_csv("../data/restaurant_data_reformatted/restaurants.csv")
users_df["user_id"] = range(0, len(users_df))
restaurants_df["item_id"] = range(0, len(restaurants_df))
_merged_df = _cross_join(users_df, restaurants_df)
_nn_input, _dnn_feats, _lin_feats, _feat_names = data_loader.load(_merged_df)

In [239]:
_merged_feats = merge_feats(dnn_feats, _dnn_feats)

In [240]:
train_set, test_set = train_test_split(nn_input, test_size=0.2)

In [241]:
deepfm = DeepFmModel(_merged_feats, _merged_feats, feat_names)

In [242]:
target_values = data["rating"].values
deepfm.train(train_set, target_values=target_values[:len(train_set)])

cpu
Train on 64000 samples, validate on 16000 samples, 250 steps per epoch
Epoch 1/10
147s - loss:  0.7090 - mse:  0.7090 - val_mse:  0.6836
Epoch 2/10
159s - loss:  0.6955 - mse:  0.6955 - val_mse:  0.6881
Epoch 3/10
192s - loss:  0.6947 - mse:  0.6947 - val_mse:  0.6843
Epoch 4/10
183s - loss:  0.6937 - mse:  0.6937 - val_mse:  0.6826
Epoch 5/10
176s - loss:  0.6936 - mse:  0.6936 - val_mse:  0.6835
Epoch 6/10
182s - loss:  0.6946 - mse:  0.6946 - val_mse:  0.6923
Epoch 7/10
187s - loss:  0.6930 - mse:  0.6930 - val_mse:  0.6835
Epoch 8/10
260s - loss:  0.6925 - mse:  0.6925 - val_mse:  0.6885
Epoch 9/10
123s - loss:  0.6923 - mse:  0.6923 - val_mse:  0.6874
Epoch 10/10
113s - loss:  0.6921 - mse:  0.6921 - val_mse:  0.6855


<tensorflow.python.keras.callbacks.History at 0x15a4f6510>

In [244]:
deepfm

<__main__.DeepFmModel at 0x15a97fb10>

---

In [265]:


class NNModelWrapper:
    def __init__(self, trained_nn):
        self._nn = trained_nn

    def predict_rating(self, nn_input, merged_df):
        y = self._nn.predict(nn_input)
        result = pd.DataFrame()
        result["rating"] = y.reshape((len(y),))
        result["user_id"] = merged_df["user_id"]
        result["item_id"] = merged_df["item_id"]
        output_matrix = result.pivot(index="user_id", columns="item_id", values="rating")
        return output_matrix
    

def _cross_join(df1, df2):
    df1["_join_key"] = 0
    df2["_join_key"] = 0
    merged_df = df1.merge(df2, on="_join_key")
    merged_df = merged_df.drop("_join_key", axis=1)
    return merged_df

    

In [266]:
model_wrapper = NNModelWrapper(deepfm)

In [268]:
_nn_matrix = model_wrapper.predict_rating(_nn_input, _merged_df)
_nn_matrix


item_id,0,1,2,3,4,5,6,7,8,9,...,759,760,761,762,763,764,765,766,767,768
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.262447,1.295900,1.134670,1.295900,1.288901,1.262447,1.262147,1.134670,1.134670,1.255392,...,1.555022,1.555022,1.547743,1.362345,1.561919,1.320382,1.561919,1.261866,1.010623,1.262200
1,1.563439,1.601233,1.381629,1.601233,1.594182,1.563439,1.563147,1.381629,1.381629,1.556357,...,1.768058,1.768058,1.760683,1.678776,1.774964,1.623345,1.774964,1.562796,1.284210,1.563206
2,1.667459,1.705796,1.476987,1.705796,1.698694,1.667459,1.667159,1.476987,1.476987,1.660312,...,1.817025,1.817025,1.809582,1.786862,1.823972,1.731196,1.823972,1.666806,1.379380,1.667251
3,1.081147,1.113497,0.977190,1.113497,1.106576,1.081147,1.080917,0.977190,0.977190,1.074207,...,1.356561,1.356561,1.349398,1.169966,1.363353,1.129449,1.363353,1.080637,0.856109,1.080934
4,1.232093,1.267139,1.083661,1.267139,1.260120,1.232093,1.231796,1.083661,1.083661,1.225037,...,1.474103,1.474103,1.466792,1.338106,1.481000,1.290445,1.481000,1.231482,0.970165,1.231840
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
133,1.584908,1.619335,1.414564,1.619335,1.612184,1.584908,1.584588,1.414564,1.414564,1.577716,...,1.775464,1.775464,1.768010,1.703309,1.782475,1.651078,1.782475,1.584270,1.309126,1.584684
134,0.596208,0.612079,0.663671,0.612079,0.605336,0.596208,0.596078,0.663671,0.663671,0.589435,...,1.358840,1.358840,1.352049,0.637618,1.365499,0.628405,1.365499,0.595998,0.488516,0.596072
135,1.830642,1.869251,1.629075,1.869251,1.862118,1.830642,1.830316,1.629075,1.629075,1.823480,...,1.982554,1.982554,1.975090,1.955282,1.989535,1.896705,1.989535,1.829955,1.537348,1.830390
136,1.572747,1.609020,1.397081,1.609020,1.601927,1.572747,1.572451,1.397081,1.397081,1.565618,...,1.767860,1.767860,1.760454,1.688593,1.774806,1.634797,1.774806,1.572115,1.296517,1.572528


In [347]:
_nn_matrix.to_csv("../data/restaurant_data_reformatted/nn_matrix_out.csv")
pd.DataFrame(_heu_matrix).to_csv("../data/restaurant_data_reformatted/heu_matrix_out.csv")

In [284]:
assert _heu_matrix.shape == _nn_matrix.shape

In [294]:
resp_fn = ResponseFunction(_heu_matrix, _nn_matrix)

In [296]:
resp_fn(0.7, 0.1)

item_id,0,1,2,3,4,5,6,7,8,9,...,759,760,761,762,763,764,765,766,767,768
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.093892,0.097238,0.081115,0.097238,0.096538,0.093892,0.093862,0.081115,0.081115,0.093187,...,0.123150,0.123150,0.122422,0.103882,0.123839,0.099686,0.123839,0.093834,0.068710,0.093868
1,0.123991,0.127771,0.105810,0.127771,0.127066,0.123991,0.123962,0.105810,0.105810,0.123283,...,0.639428,0.639428,0.547861,0.135525,0.845144,0.129982,0.845144,0.123927,0.096069,0.123968
2,0.134394,0.138227,0.115346,0.138227,0.137517,0.134394,0.134363,0.115346,0.115346,0.133679,...,0.644325,0.644325,0.552751,0.146334,0.850045,0.140767,0.850045,0.134328,0.105586,0.134373
3,0.075762,0.078997,0.065367,0.078997,0.078305,0.075762,0.075739,0.065367,0.065367,0.075068,...,0.268295,0.268295,0.237302,0.084644,0.337316,0.080592,0.337316,0.075711,0.053258,0.075741
4,0.090857,0.094361,0.076014,0.094361,0.093660,0.090857,0.090827,0.076014,0.076014,0.090151,...,0.115058,0.115058,0.114327,0.101458,0.115748,0.096692,0.115748,0.090796,0.064664,0.090832
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
133,0.126138,0.129581,0.109104,0.129581,0.128866,0.126138,0.126106,0.109104,0.109104,0.125419,...,0.640169,0.640169,0.548594,0.137978,0.845895,0.132755,0.845895,0.126075,0.098560,0.126116
134,0.118401,0.119988,0.125147,0.119988,0.157062,0.118401,0.118388,0.125147,0.125147,0.155471,...,0.232412,0.232412,0.260698,0.213674,0.195330,0.121620,0.195330,0.118380,0.107631,0.118387
135,0.150712,0.154573,0.130555,0.154573,0.153859,0.150712,0.150679,0.130555,0.130555,0.149996,...,0.660878,0.660878,0.569302,0.163176,0.866601,0.157318,0.866601,0.150643,0.121382,0.150687
136,0.124922,0.128550,0.107356,0.128550,0.127840,0.124922,0.124893,0.107356,0.107356,0.124209,...,0.639408,0.639408,0.547838,0.136507,0.845128,0.131127,0.845128,0.124859,0.097299,0.124900


## SVD learning

In [299]:
_user_ids = users_df["userID"]
_place_ids = restaurants_df["placeID"]

In [300]:
ratings_df = pd.read_csv("../data/restaurant_data_reformatted/ratings.csv")

In [301]:
ratings_df

Unnamed: 0,userID,placeID,rating,food_rating,service_rating
0,U1077,135085,2,2,2
1,U1077,135038,2,2,1
2,U1077,132825,2,2,2
3,U1077,135060,1,2,2
4,U1068,135104,1,1,2
...,...,...,...,...,...
1156,U1043,132630,1,1,1
1157,U1011,132715,1,1,0
1158,U1068,132733,1,1,0
1159,U1068,132594,1,1,1


In [323]:
_long_table = ratings_df[ratings_df["userID"].isin(_user_ids) & ratings_df["placeID"].isin(_place_ids)]

In [324]:
_long_table = _long_table.drop(["food_rating", "service_rating"], axis=1)

In [325]:
_long_table

Unnamed: 0,userID,placeID,rating
0,U1077,135085,2
2,U1077,132825,2
3,U1077,135060,1
4,U1068,135104,1
5,U1068,132740,0
...,...,...,...
1155,U1043,132732,1
1156,U1043,132630,1
1157,U1011,132715,1
1158,U1068,132733,1


In [326]:
_user_id_mapping = {user_id_name: i for i, user_id_name in enumerate(_user_ids)}
_user_id_mapping

{'U1001': 0,
 'U1002': 1,
 'U1003': 2,
 'U1004': 3,
 'U1005': 4,
 'U1006': 5,
 'U1007': 6,
 'U1008': 7,
 'U1009': 8,
 'U1010': 9,
 'U1011': 10,
 'U1012': 11,
 'U1013': 12,
 'U1014': 13,
 'U1015': 14,
 'U1016': 15,
 'U1017': 16,
 'U1018': 17,
 'U1019': 18,
 'U1020': 19,
 'U1021': 20,
 'U1022': 21,
 'U1023': 22,
 'U1024': 23,
 'U1025': 24,
 'U1026': 25,
 'U1027': 26,
 'U1028': 27,
 'U1029': 28,
 'U1030': 29,
 'U1031': 30,
 'U1032': 31,
 'U1033': 32,
 'U1034': 33,
 'U1035': 34,
 'U1036': 35,
 'U1037': 36,
 'U1038': 37,
 'U1039': 38,
 'U1040': 39,
 'U1041': 40,
 'U1042': 41,
 'U1043': 42,
 'U1044': 43,
 'U1045': 44,
 'U1046': 45,
 'U1047': 46,
 'U1048': 47,
 'U1049': 48,
 'U1050': 49,
 'U1051': 50,
 'U1052': 51,
 'U1053': 52,
 'U1054': 53,
 'U1055': 54,
 'U1056': 55,
 'U1057': 56,
 'U1058': 57,
 'U1059': 58,
 'U1060': 59,
 'U1061': 60,
 'U1062': 61,
 'U1063': 62,
 'U1064': 63,
 'U1065': 64,
 'U1066': 65,
 'U1067': 66,
 'U1068': 67,
 'U1069': 68,
 'U1070': 69,
 'U1071': 70,
 'U1072': 71,
 '

In [327]:
_item_id_mapping = {item_id_name: i for i, item_id_name in enumerate(_place_ids)}
_item_id_mapping

{132001: 0,
 132002: 1,
 132003: 2,
 132004: 3,
 132005: 4,
 132006: 5,
 132007: 6,
 132008: 7,
 132009: 8,
 132010: 9,
 132012: 10,
 132013: 11,
 132014: 12,
 132015: 13,
 132016: 14,
 132017: 15,
 132018: 16,
 132019: 17,
 132020: 18,
 132021: 19,
 132022: 20,
 132023: 21,
 132024: 22,
 132025: 23,
 132026: 24,
 132028: 25,
 132030: 26,
 132031: 27,
 132083: 28,
 132087: 29,
 132092: 30,
 132094: 31,
 132096: 32,
 132097: 33,
 132098: 34,
 132100: 35,
 132101: 36,
 132102: 37,
 132103: 38,
 132105: 39,
 132106: 40,
 132107: 41,
 132108: 42,
 132109: 43,
 132114: 44,
 132115: 45,
 132116: 46,
 132118: 47,
 132119: 48,
 132120: 49,
 132121: 50,
 132125: 51,
 132126: 52,
 132127: 53,
 132128: 54,
 132130: 55,
 132131: 56,
 132132: 57,
 132133: 58,
 132136: 59,
 132137: 60,
 132138: 61,
 132145: 62,
 132146: 63,
 132147: 64,
 132155: 65,
 132156: 66,
 132157: 67,
 132159: 68,
 132160: 69,
 132161: 70,
 132162: 71,
 132163: 72,
 132164: 73,
 132165: 74,
 132166: 75,
 132167: 76,
 132171: 

In [328]:
_long_table["user_id"] = _long_table["userID"].map(_user_id_mapping)
_long_table["item_id"] = _long_table["placeID"].map(_item_id_mapping)

In [329]:
_long_table

Unnamed: 0,userID,placeID,rating,user_id,item_id
0,U1077,135085,2,76,747
2,U1077,132825,2,76,526
3,U1077,135060,1,76,738
4,U1068,135104,1,67,763
5,U1068,132740,0,67,460
...,...,...,...,...,...
1155,U1043,132732,1,42,452
1156,U1043,132630,1,42,371
1157,U1011,132715,1,10,445
1158,U1068,132733,1,67,453


In [348]:
_long_table.to_csv("../data/restaurant_data_reformatted/ratings_out.csv")

In [334]:
!pip install surprise



In [337]:
import numpy as np
from surprise import Dataset, Reader, SVD, accuracy, KNNBasic

ValueError: numpy.ndarray size changed, may indicate binary incompatibility. Expected 88 from C header, got 80 from PyObject

In [None]:
dataset = Dataset.load_from_df(df[['user_id', 'item_id', 'rating']], Reader(rating_scale=(0, 5)))
algo = SVD()
return cross_validate(algo, dataset, measures=['RMSE', 'MAE'], cv=cv, verbose=True)
  

-0.15284972228220967