In [1]:
from collections import defaultdict
from surprise import Dataset
import pandas as pd
from surprise import SVD
from surprise import NormalPredictor
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import cross_validate
import numpy as np
from surprise import dump
import os
from surprise.model_selection import KFold
import io  # needed because of weird encoding of u.item file

from surprise import KNNBaseline
from surprise import get_dataset_dir



In [2]:

def precision_recall_at_k(predictions, k=10, threshold=3.5):
    '''Return precision and recall at k metrics for each user.'''

    # First map the predictions to each user.
    user_est_true = defaultdict(list)
    for uid, _, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))

    precisions = dict()
    recalls = dict()
    for uid, user_ratings in user_est_true.items():

        # Sort user ratings by estimated value
        user_ratings.sort(key=lambda x: x[0], reverse=True)

        # Number of relevant items
        n_rel = sum((true_r >= threshold) for (_, true_r) in user_ratings)

        # Number of recommended items in top k
        n_rec_k = sum((est >= threshold) for (est, _) in user_ratings[:k])

        # Number of relevant and recommended items in top k
        n_rel_and_rec_k = sum(((true_r >= threshold) and (est >= threshold))
                              for (est, true_r) in user_ratings[:k])

        # Precision@K: Proportion of recommended items that are relevant
        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 1

        # Recall@K: Proportion of relevant items that are recommended
        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 1

    return precisions, recalls

def get_top_n(predictions, n=10):
    '''Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    '''

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

def train_and_save_prediction_model (file_to_save) :
    #train and save model

    df = pd.read_csv('testForInput.csv')
    # A reader is still needed but only the rating_scale param is requiered.
    reader = Reader(rating_scale=(1, 5))

    # The columns must correspond to user id, item id and ratings (in that order).
    data = Dataset.load_from_df(df[['userID', 'itemID', 'rating']], reader)
    trainset = data.build_full_trainset()
    #train data with algorithm
    algo = SVD()
    #cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
    algo.fit(trainset)
    # Dump algorithm and reload it.
    file_name = os.path.expanduser(file_to_save)
    dump.dump(file_name, algo=algo)

def train_and_save_similarity_model(file_to_save):
    df = pd.read_csv('testForInput.csv')
    # A reader is still needed but only the rating_scale param is requiered.
    reader = Reader(rating_scale=(1, 5))

    # The columns must correspond to user id, item id and ratings (in that order).
    data = Dataset.load_from_df(df[['userID', 'itemID', 'rating']], reader)
    trainset = data.build_full_trainset()
    sim_options = {'name': 'cosine', 'user_based': True} # or item based
    algo = KNNBaseline(sim_options=sim_options)
    algo.fit(trainset)
    
    # Dump algorithm and reload it.
    file_name = os.path.expanduser(file_to_save)
    dump.dump(file_name, algo=algo)
    
    
    
    
def load_model_from_disk(file_to_load):
    file_name = os.path.expanduser(file_to_load) # 'SVD_dump_file'
    _, loaded_algo = dump.load(file_name)
    return loaded_algo

In [3]:
train_and_save_prediction_model ('SVD_dump_file')

In [4]:
train_and_save_similarity_model('KNNbaseline_dump_file')

Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.


In [3]:
prediction_algo = load_model_from_disk('SVD_dump_file')

In [4]:
knn_algo = load_model_from_disk('KNNbaseline_dump_file')

In [None]:
from flask import Flask,Response,make_response
app = Flask(__name__)
df = pd.read_csv('testForInput.csv')
#'ALOXOO497B4LH'
@app.route('/topN/<user_name>') # /review/
def get_TopN_items_prediction(user_name):
    
    test = df[df['userID'] == user_name].drop(df.columns[3], axis=1)
    columnsTitles=["userID","itemID","rating"]
    test=test.reindex(columns=columnsTitles)
    predictions = prediction_algo.test(np.array(test))

    top_n = get_top_n(predictions, n=10)
    ret = []
    # Print the recommended items for each user
    for uid, user_ratings in top_n.items():
        ret.append ( (uid, [iid for (iid, _) in user_ratings]) )
    
    res_data = {
        "username": ret[0][0],
        "items": ret[0][1]
        }
#     return jsonify(dict_data)
    
    resp = make_response(res_data)
    resp.headers['Access-Control-Allow-Origin'] = '*'
    
    return resp,200

@app.route('/knn/<user_name>') 
def get_k_nearest_neighbour_by_itme (user_name):
    
    i = 0 
    for item in df['userID'].unique():
        if item == user_name:
            break
        i += 1
        
    toy_k_neighbors = knn_algo.get_neighbors(i, k=10)
#     res_data = ','.join(str(e) for e in toy_k_neighbors)
    items = []
    for e in toy_k_neighbors:
        items.append(df.iloc[int(e)]['itemID'])
    res_data ={
        "items":items
    }
    resp = make_response(res_data)
    resp.headers['Access-Control-Allow-Origin'] = '*'
    return resp,200

#     return ','.join(str(e) for e in toy_k_neighbors)
    
    


if __name__ == '__main__':
    app.run()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [18/Jun/2020 17:25:54] "GET / HTTP/1.1" 404 -
127.0.0.1 - - [18/Jun/2020 17:26:03] "GET /knn/ALOXOO497B4LH HTTP/1.1" 200 -


In [9]:
df = pd.read_csv('testForInput.csv')
des_user_name = df[df['userID'] == 'ALOXOO497B4LH'].drop(df.columns[3], axis=1)


des_user_name.index[0]

3879

In [3]:
df = pd.read_csv('testForInput.csv')
# A reader is still needed but only the rating_scale param is requiered.
reader = Reader(rating_scale=(1, 5))

# The columns must correspond to user id, item id and ratings (in that order).
data = Dataset.load_from_df(df[['userID', 'itemID', 'rating']], reader)
trainset = data.build_full_trainset()

In [6]:
des_user_name = df[df['userID'] == 'ALOXOO497B4LH'].drop(df.columns[3], axis=1)
des_user_name

Unnamed: 0,itemID,rating,userID
3879,B00RUZPKRQ,5,ALOXOO497B4LH
4278,B0186D1TYS,5,ALOXOO497B4LH
4972,B01FWKKVTM,5,ALOXOO497B4LH


In [26]:
print ( trainset.all_ratings() )

<generator object Trainset.all_ratings at 0x0000017D4D9AD948>


In [30]:
i = 0 
for item in df['userID'].unique():
    if item == 'ALOXOO497B4LH':
        print (i)
    i += 1

3615


In [32]:
len ( df['userID'].unique() )

4593

In [6]:
df = pd.read_csv('testForInput.csv')
i = 0 
for item in df['userID'].unique():
    if item == 'ALOXOO497B4LH':
        break
    i += 1

toy_k_neighbors = knn_algo.get_neighbors(i, k=10)
toy_k_neighbors

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [7]:
i

3615