In [22]:
import numpy as np
import pandas as pd
import gc
from implicit.datasets.lastfm import get_lastfm
from implicit.nearest_neighbours import bm25_weight, BM25Recommender
from implicit.als import AlternatingLeastSquares
from implicit.cpu.bpr import BayesianPersonalizedRanking
from implicit.recommender_base import RecommenderBase
from implicit import evaluation
from utils import *
import json
import statistics

### Set model weights

In [2]:
BPR_WEIGHT = 0.5
ALS_WEIGHT = 0.8
BM25_WEIGHT = 1
K = 10

### Load dataset

In [3]:
amazon_beauty_df = pd.read_csv("ratings_Beauty.csv")
user_map, item_map, amazon_beauty_csr = pandas_df_to_csr(amazon_beauty_df)

In [4]:
amazon_beauty_coo_bm25 = bm25_weight(amazon_beauty_csr, K1=100, B=0.8)

In [5]:
# Test-Train Split
train_csr, test_csr = evaluation.train_test_split(amazon_beauty_coo_bm25, train_percentage=0.8, random_state=55)
print(f"Train size: {train_csr.size} \n Test size: {test_csr.size}")

Train size: 1618938 
 Test size: 404132


### Load models

In [6]:
ALS_model = AlternatingLeastSquares(factors=128, regularization=0.1, alpha=3.0)
ALS_model.fit(train_csr)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 15/15 [34:06<00:00, 136.43s/it]


In [7]:
# ALS_model = AlternatingLeastSquares.load("4_CF_ALS_implicit")
BPR_model = BayesianPersonalizedRanking.load("5_CF_BPR_implicit")
BM25_model = BM25Recommender.load("6_BM25")

### Inference

In [11]:
# Dictionary to store intermediate recommendations and ensemble results
results = {}
results["BPR"] = {"weight": BPR_WEIGHT}
results["ALS"] = {"weight": ALS_WEIGHT}
results["BM25"] = {"weight": BM25_WEIGHT}
results["ENSEMBLE"] = {"weight": 1}
results

{'BPR': {'weight': 0.5},
 'ALS': {'weight': 0.8},
 'BM25': {'weight': 1},
 'ENSEMBLE': {'weight': 1}}

In [9]:
test_coo = test_csr.tocoo()

In [12]:
actual_dict = {}
for user_id, product_id, rating in zip(test_coo.row, test_coo.col, test_coo.data):
    # print(f"Processing: user_id: {user_id}, product_id: {product_id}, rating: {rating}")
    print(user_id, end = ', ')
    # Retrieve actual products and ratings
    if user_id in actual_dict:
        actual_dict[user_id].append(product_id)
    else:
        actual_dict[user_id] = [product_id]


1, 6, 11, 12, 13, 16, 19, 24, 32, 35, 38, 39, 42, 47, 49, 53, 58, 59, 59, 67, 71, 73, 73, 75, 80, 90, 97, 109, 110, 116, 118, 118, 120, 121, 126, 128, 131, 142, 154, 163, 166, 168, 171, 172, 176, 183, 186, 187, 189, 189, 189, 195, 198, 201, 202, 205, 209, 211, 218, 220, 222, 226, 226, 227, 231, 239, 240, 242, 246, 247, 249, 251, 256, 258, 259, 264, 266, 274, 289, 297, 298, 304, 312, 316, 318, 320, 321, 328, 337, 338, 338, 338, 340, 346, 350, 351, 351, 351, 354, 357, 357, 361, 363, 365, 370, 372, 374, 374, 377, 380, 382, 392, 397, 398, 413, 416, 417, 417, 418, 423, 424, 425, 426, 426, 426, 426, 438, 446, 453, 458, 464, 470, 472, 475, 477, 487, 499, 500, 500, 501, 514, 517, 522, 530, 530, 530, 530, 530, 539, 543, 554, 557, 566, 569, 570, 573, 577, 579, 588, 589, 591, 597, 598, 602, 603, 604, 607, 608, 610, 611, 620, 624, 624, 625, 626, 629, 633, 635, 636, 647, 647, 648, 653, 655, 655, 656, 657, 658, 660, 664, 664, 673, 673, 673, 673, 673, 674, 679, 686, 688, 694, 696, 696, 697, 711, 717,

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [65]:
import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["test"]
mycol = mydb["products"]

In [70]:
image_id_dict = {}
counter = 0
for x in mycol.find():
    image_id = x['_id']
    # print(image_id)
    image_id_dict[image_id] = None
    counter += 1
    if counter == len(item_map):
        break
assert counter == len(image_id_dict.keys())

In [71]:
prod_img_map = {}
counter = 0
for x in mycol.find():
    image_id = x['_id']
    # print(image_id)
    prod_img_map[counter] = image_id
    counter += 1
    if counter == len(item_map):
        break

In [62]:
# myclient.close()

In [79]:
def get_rec(user_id: int):
    # Get recommendation from each model
    ids, scores = BPR_model.recommend(user_id, test_csr[user_id], N=K, filter_already_liked_items=False)
    results["BPR"][user_id] = {"product_ids": list(ids), "scores": list(scores)}
    
    ids, scores = ALS_model.recommend(user_id, test_csr[user_id], N=K, filter_already_liked_items=False)
    results["ALS"][user_id] = {"product_ids": list(ids), "scores": list(scores)}
    
    ids, scores = BM25_model.recommend(user_id, test_csr[user_id], N=K, filter_already_liked_items=False)
    results["BM25"][user_id] = {"product_ids": list(ids), "scores": list(scores)}
    
    # Ensemble results
    ensemble_results = {}
    for _model, _results in results.items():
        if _model == "ENSEMBLE":
            continue
        scores = _results[user_id]['scores']
        # Check if all product scores are equal or empty
        if len(set(scores)) <= 1:
            # print(f"Skipping Model: {_model}, User Id: {user_id}")
            continue
        # Score is normalized to range 0-1 and then weighted by the specified model weight 
        normalized_scores = normalize(arr=scores, t_max=_results['weight'])
        # print("Normalized scores: ", normalized_scores)
        for id, score in zip(_results[user_id]['product_ids'], normalized_scores):
            # Case where product is already recommended by one or more other models
            if id in ensemble_results:
                ensemble_results[id] += score # Add the score to the previous value
                ensemble_results[id] /= 2 # Average the score (This is a rough average)
            # Case where product is recommended first time by current model
            else:
                ensemble_results[id] = score
        
    ensemble_results = sort_by_score(ensemble_results)
    results["ENSEMBLE"][user_id] = {"product_ids": list(ensemble_results.keys()), "scores": list(ensemble_results.values())}
    return results["ENSEMBLE"][user_id]["product_ids"][:K]

In [92]:
get_rec(user_id=821601)

[96976, 200498, 4542, 2693, 198411, 174709, 21261, 4524, 4545, 120616]

### Demo

In [75]:
import io
import gradio as gr
from skimage.io import imread, imshow

In [56]:
def get_product_image(product_id: int):
    query = { "_id": product_id }
    for x in mycol.find(query):
        for e, pic in enumerate(x['imgs']):
            picture = imread(io.BytesIO(pic['picture']))
            # do something with the picture, etc
            # imshow(picture)
    return picture

In [84]:
def grad_wrapper(user_id):
    results = get_rec(user_id=int(user_id))
    img_list = []
    for prod_id in results:
        img_list.append(
            get_product_image(
                product_id=prod_img_map[prod_id]
            )
        )
    return img_list

In [86]:
demo = gr.Interface(fn=grad_wrapper, inputs="number", outputs=["image", "image", "image", "image", "image", "image", "image", "image", "image", "image"])

demo.launch()

Running on local URL:  http://127.0.0.1:7867

To create a public link, set `share=True` in `launch()`.


