<a href="https://colab.research.google.com/github/vincm1/RecSys_Implicit/blob/master/Alternating_Least_Squares_(ALS).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Alternating Least Squares

This notebook will ALS Matrix Factorization algorithm to recommend and rank Top10 items based on the paper of Koren, Bell and Volinsky, 2009 (https://datajobs.com/data-science-repo/Recommender-Systems-[Netflix].pdf)

The used library:

*   Implicit ALS(https://benfred.github.io/implicit/ by Ben Frederickson)

In [2]:
#!pip install --upgrade implicit

In [3]:
import zipfile
import time
import pickle
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

import scipy.sparse as sparse

from datetime import datetime, timedelta

# Loading DataFrames



In [4]:
df_zip = zipfile.ZipFile('/content/drive/MyDrive/RecSys/Orders_Nov22_Jun23.zip')
df = pd.read_csv(df_zip.open('Bericht 1.csv'), delimiter=";")

  df = pd.read_csv(df_zip.open('Bericht 1.csv'), delimiter=";")


In [5]:
df.head(2)

Unnamed: 0,Order Nbr,Entry Date,Entry DateTime,FiscalMonth,BranchCustomerNbr,CustomerName,BusinessUnitLevel2,KDGroup,Sku,Product Descr1,Product Descr2,ProductGroup,ProductGroupMasterDescription,ProductGroupDescription,ProductGroup2ndDescription,Sales,Qty Shipped
0,1547606,01.11.22,,2022FM11,15515778,NET-S M. CHMIELEWSKI,Export Channel (DE),,9433B9X,INK CARTRIDGE SPS,BLACK 370ML 600 DPI INKJET BULK,1037,Consumables,Ink,Supplies,-1533,-1.0
1,1547615,01.11.22,,2022FM11,15509465,DIGITAL RIVER IRELAND LIMITED,Export Channel (DE),DIRL,CB31510,LENOVO KEYBOARD PACK,FOR TAB P11-DE,641,Input Devices,Keyboards & Keypads,Printers & Peripherals,-10461,-1.0


In [6]:
print(len(df), len(df["BranchCustomerNbr"].unique()), len(df["Sku"].unique()))

2220299 17697 77401


# Data Preprocessing

In [7]:
# converting the customerid to string
df["BranchCustomerNbr"] = df["BranchCustomerNbr"].astype(str)
# converting the skuid to string
df["Sku"] = df["Sku"].astype(str)
# Entry Date to date
df['Entry Date'] = pd.to_datetime(df['Entry Date'], format='%d.%m.%y')
# dropping retours (orders with negative Qty shipped) and zero Qty shipped orders
df = df[df["Qty Shipped"] > 0]
# dropping backlog invoices, Specified date to filter the rows
specific_date = pd.to_datetime('2022-11-01')
# Filter the DataFrame to keep only the rows that are before or equal to the specific date
df = df[df["Entry Date"] >= specific_date]
#insert purchase indication column
df["purchase"] = 1

In [8]:
print(len(df), len(df["BranchCustomerNbr"].unique()), len(df["Sku"].unique()))

2028956 13894 75643


In [9]:
# add column that represent sku count
def sku_count(df_pl):

  df_pl['sku_count'] = df_pl.groupby('BranchCustomerNbr')['Sku'].transform('nunique')

  return df_pl

In [10]:
df = sku_count(df)

In [11]:
# drop customers that only purchased 1 SKU
df = df[df["sku_count"] > 1]

In [12]:
print(len(df), len(df["BranchCustomerNbr"].unique()), len(df["Sku"].unique()))

2025544 11328 75495


In [13]:
# Create a numeric user_id and artist_id column
df['BranchCustomerNbr'] = df['BranchCustomerNbr'].astype("category")
df['Sku'] = df['Sku'].astype("category")
df['bcn_id'] = df['BranchCustomerNbr'].cat.codes
df['sku_id'] = df['Sku'].cat.codes

In [14]:
df.head(2)

Unnamed: 0,Order Nbr,Entry Date,Entry DateTime,FiscalMonth,BranchCustomerNbr,CustomerName,BusinessUnitLevel2,KDGroup,Sku,Product Descr1,...,ProductGroup,ProductGroupMasterDescription,ProductGroupDescription,ProductGroup2ndDescription,Sales,Qty Shipped,purchase,sku_count,bcn_id,sku_id
213,1545306,2022-11-02,,2022FM11,15885514,AXIS SOLUTION (PRIVATE) LIMITED,Export Channel (DE),,CF55877,DT PRINT ZQ320 KIT LABEL SENSOR,...,5805,AIDC/PoS Printers,Mobile Receipt Printer,"Other (incl. AIDC/POS, V7)","10.713,30",41.0,1,20,417,49730
458,4422886,2022-11-03,,2023FM02,44413224,BWG INFORMATIONSYSTEME GMBH,Business Channel,,CF89211,Z-SELECT 2000D REMOVABLE NS,...,5812,AIDC/PoS Printers,Label Printers Supplies,"Other (incl. AIDC/POS, V7)","1.393,00",140.0,1,111,803,52649


In [15]:
users = df.bcn_id.unique()
skus = df.sku_id.unique()
print(len(users), len(skus))

11328 75495


In [16]:
df_2 = df[["bcn_id", "sku_id", "Entry Date", "Qty Shipped", "purchase"]]

In [20]:
grouped_df = df_2.groupby(["bcn_id", "sku_id"]).agg({
          "Entry Date":"max",
          "Qty Shipped":"sum",
          "purchase":"sum"}).reset_index()

In [18]:
grouped_sum = df_2.groupby(["bcn_id", "sku_id"]).agg({
          "Qty Shipped":"sum",
          "purchase":"sum"}).reset_index()

In [21]:
# create binary column
grouped_df["purch_bin"] = 1

In [22]:
grouped_df.head(2)

Unnamed: 0,bcn_id,sku_id,Entry Date,Qty Shipped,purchase,purch_bin
0,0,925,2023-02-03,1.0,1,1
1,0,3417,2023-03-31,1.0,1,1


## Train test split

In [23]:
def train_test_split(df, holdout_num):
    """ perform training testing split

    @param df: dataframe
    @param holdhout_num: number of items to be held out per user as testing items

    @return df_train: training data
    @return df_test testing data

    """
    # first sort the data by time
    df = df.sort_values(['bcn_id', 'Entry Date'], ascending=[True, False])

    # perform deep copy to avoid modification on the original dataframe
    df_train = df.copy(deep=True)
    df_test = df.copy(deep=True)

    # get test set
    df_test = df_test.groupby(['bcn_id']).head(holdout_num).reset_index()

    # get train set
    df_train = df_train.merge(
        df_test[['bcn_id', 'sku_id']].assign(remove=1),
        how='left'
    ).query('remove != 1').drop('remove', 1).reset_index(drop=True)

    # Sanity check to make sure we're not duplicating/losing data
    assert len(df) == len(df_train) + len(df_test)

    return df_train, df_test

In [24]:
df_train, df_test = train_test_split(grouped_df[["bcn_id","sku_id", "Entry Date", "purchase"]], holdout_num=1)

  ).query('remove != 1').drop('remove', 1).reset_index(drop=True)


In [25]:
set(df_test.bcn_id.unique()).issubset(set(df_train.bcn_id.unique()))
# Filter train DataFrame to include only bcn_ids present in the test set
df_test_filtered = df_test[df_test['bcn_id'].isin(set(df_train['bcn_id'].unique()))]
#df_test_filtered = df_test_filtered[df_test_filtered['sku_id'].isin(set(df_train['sku_id'].unique()))]
set(df_test.bcn_id.unique()).issubset(set(df_train.bcn_id.unique()))

True

## Negative Sampling

In [26]:
def negative_sampling(bcn_ids, sku_ids, items, n_neg):
    """This function creates n_neg negative labels for every positive label

    @param user_ids: list of user ids
    @param movie_ids: list of movie ids
    @param items: unique list of movie ids
    @param n_neg: number of negative labels to sample

    @return df_neg: negative sample dataframe

    """

    neg = []
    ui_pairs = zip(bcn_ids, sku_ids)
    records = set(ui_pairs)

    # for every positive label case
    for (u, i) in records:
        # generate n_neg negative labels
        for _ in range(n_neg):
            j = np.random.choice(items)
            # resample if the movie already exists for that user
            while (u, j) in records:
                j = np.random.choice(items)
            neg.append([u, j, 0])

    # convert to pandas dataframe for concatenation later
    df_neg = pd.DataFrame(neg, columns=['bcn_id', 'sku_id', 'purch_bin'])

    return df_neg

In [27]:
grouped_df = df_2.groupby(["bcn_id", "sku_id"]).agg({
          "Qty Shipped":"sum",
          "purchase":"sum"}).reset_index()

In [34]:
df_test_filtered

Unnamed: 0,index,bcn_id,sku_id,Entry Date,purchase
0,27,0,14888,2023-06-28,7
1,121,1,34964,2023-06-20,1
2,157,2,28331,2023-06-26,1
3,164,3,25932,2023-06-26,16
4,184,4,69002,2023-03-06,1
...,...,...,...,...,...
11323,541839,11323,67869,2023-06-15,1
11324,541841,11324,2702,2023-01-26,2
11325,541843,11325,38849,2023-03-22,1
11326,541847,11326,42224,2023-02-24,1


## ALS Model param tuning

In [28]:
import implicit
from implicit.gpu.als import AlternatingLeastSquares as ALS
from implicit.evaluation import leave_k_out_split, precision_at_k, mean_average_precision_at_k, ndcg_at_k, AUC_at_k, train_test_split

In [35]:
# create sparse user item csr matrix
csr_train = sparse.csr_matrix((df_train['purchase'], (df_train['bcn_id'], df_train['sku_id'])))
csr_test = sparse.csr_matrix((df_test_filtered['purchase'], (df_test_filtered['bcn_id'], df_test_filtered['sku_id'])))

In [30]:
csr_overall = sparse.csr_matrix((grouped_sum['purchase'], (grouped_sum['bcn_id'], grouped_sum['sku_id'])))

In [36]:
model = ALS()

In [39]:
model.fit(csr_train)

  0%|          | 0/15 [00:00<?, ?it/s]

In [40]:
auc10 = AUC_at_k(model, csr_train, csr_test, K=10, num_threads=10)
auc10

  0%|          | 0/11328 [00:00<?, ?it/s]

0.5762536653294095

## Hyperparametertuning


In [41]:
from IPython.utils.sysinfo import num_cpus
from sklearn.model_selection import ParameterGrid

# Grid of hyperparameters to search
param_grid = {
    'factors': [50, 100, 150, 200],
    'iterations':[5, 10, 15, 20],
    'alpha': [5, 10, 20, 40],
    'regularization': [0.01, 0.1]
}

best_auc = -np.inf
best_params = {}

# Iterate through all parameter combinations
for params in ParameterGrid(param_grid):
    model = ALS(factors=params['factors'],
                    iterations=params['iterations'],
                    alpha=params['alpha'],
                    regularization=params['regularization'])
    model.fit(csr_train, show_progress=False)

    auc10 = AUC_at_k(model, csr_train, csr_test,
                    K=10, num_threads=10, show_progress=False)

    if auc10 > best_auc:
        best_auc = auc10
        best_params = params

# Print the best parameters and AUC
print("Best parameters:", best_params)
print("Best AUC:", best_auc)

Best parameters: {'alpha': 40, 'factors': 200, 'iterations': 10, 'regularization': 0.01}
Best AUC: 0.596295090513017


In [117]:
model = ALS(**best_params)
model.fit(csr_train)
auc10 = AUC_at_k(model, csr_train, csr_test, K=10, num_threads=10)
prec10 = precision_at_k(model, csr_train, csr_test, K=10, num_threads=10)
map10 = mean_average_precision_at_k(model, csr_train, csr_test, K=10, num_threads=10)
ndcg10 = ndcg_at_k(model, csr_train, csr_test, K=10, num_threads=10)

print(f"AUC@10: {auc10}; Prec@10: {prec10}; Map@10: {map10}; Ndcg@10: {ndcg10}")

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/11328 [00:00<?, ?it/s]

  0%|          | 0/11328 [00:00<?, ?it/s]

  0%|          | 0/11328 [00:00<?, ?it/s]

  0%|          | 0/11328 [00:00<?, ?it/s]

AUC@10: 0.5951473875594666; Prec@10: 0.19041313559322035; Map@10: 0.10683781163124378; Ndcg@10: 0.1266763173077515


## Recommending with ALS

In [43]:
# add column that represent sku count
def sku_count_2(df_pl):

  df_pl['sku_count'] = df_pl.groupby('bcn_id')['sku_id'].transform('nunique')

  return df_pl

In [44]:
grouped_df

Unnamed: 0,bcn_id,sku_id,Qty Shipped,purchase
0,0,925,1.0,1
1,0,3417,1.0,1
2,0,3418,4.0,2
3,0,3419,3.0,1
4,0,4184,4.0,1
...,...,...,...,...
541846,11326,33694,9.0,1
541847,11326,42224,2.0,1
541848,11326,45153,2.0,1
541849,11327,35982,25.0,1


In [45]:
df_2 = sku_count_2(grouped_df)

In [46]:
users_less_20 = df_2[(df_2.sku_count <= 20) & (df_2.sku_count >= 5) ]["bcn_id"].unique()
users_more_20 = df_2[df_2.sku_count > 20]["bcn_id"].unique()

In [47]:
sku_list = df[["sku_id", "Product Descr1", "ProductGroupDescription", "ProductGroupMasterDescription"]].drop_duplicates()
sku_list.head(1)

Unnamed: 0,sku_id,Product Descr1,ProductGroupDescription,ProductGroupMasterDescription
213,49730,DT PRINT ZQ320 KIT LABEL SENSOR,Mobile Receipt Printer,AIDC/PoS Printers


In [48]:
from google.colab import files

In [121]:
 # Assuming you want recommendations for user with ID 10
import random
user_id = random.choice(users_less_20)
user_id_2 = random.choice(users_more_20)
print(user_id)
print(user_id_2)

5865
5990


In [122]:
user_id_bcn = df.loc[df['bcn_id'] == user_id, 'BranchCustomerNbr'].head(1).values[0]
user_id_2_bcn = df.loc[df['bcn_id'] == user_id_2, 'BranchCustomerNbr'].head(1).values[0]
print(user_id_bcn)
print(user_id_2_bcn)

44508633
44510285


BCNs to recommend on:

44508633
44510285

In [123]:
user_purchases = pd.DataFrame(df[(df["BranchCustomerNbr"] == user_id_bcn)])
user_purchases = user_purchases.groupby(["bcn_id","sku_id"]).agg({
                                        "Qty Shipped":"sum",
                                        "purchase":"sum"}).reset_index()
user_purchases = user_purchases.sort_values(by="purchase", ascending=False)
user_purchases = user_purchases.merge(sku_list, on="sku_id", how="left")
user_purchases.to_csv(f'{user_id}_purchase_history.csv')
files.download(f'{user_id}_purchase_history.csv')
user_purchases

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0,bcn_id,sku_id,Qty Shipped,purchase,Product Descr1,ProductGroupDescription,ProductGroupMasterDescription
0,5865,12016,50.0,1,4GB 2666MHZ DDR4 NON-ECC,Generic Memory,Memory and Processors
1,5865,26983,4.0,1,K243YBMIX 60.5CM (23.8IN) TFT,Led Small Format,Display
2,5865,33050,2.0,1,8GB DDR4-2666MHZ CL15 SODIMM,Generic Memory,Memory and Processors
3,5865,53566,8.0,1,WD PURPLE 4TB 256MB 3.5IN SATA,Hard Drive Sata,Hard Drives & Optical Drives
4,5865,57133,10.0,1,TX-2202A 54.6CM 21.5IN,Led Small Format,Display
5,5865,64636,1.0,1,128GB MSATA SSD,Solid State Drive (Ssd),Hard Drives & Optical Drives
6,5865,68737,6.0,1,PURE WINGS 2 80MM,Computer Cooling Systems & Fans,Computer Systems
7,5865,68787,10.0,1,SYSTEM POWER 9 700W CM,Power Supplies,Computer Systems


In [124]:
user_purchases_2 = pd.DataFrame(df[(df["BranchCustomerNbr"] == user_id_2_bcn)])
user_purchases_2 = user_purchases_2.groupby(["bcn_id","sku_id"]).agg({
                                        "Qty Shipped":"sum",
                                        "purchase":"sum"}).reset_index()
user_purchases_2 = user_purchases_2.sort_values(by="purchase", ascending=False)
user_purchases_2 = user_purchases_2.merge(sku_list, on="sku_id", how="left")
user_purchases_2.to_csv(f'{user_id_2}_purchase_history.csv')
files.download(f'{user_id_2}_purchase_history.csv')
user_purchases_2

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0,bcn_id,sku_id,Qty Shipped,purchase,Product Descr1,ProductGroupDescription,ProductGroupMasterDescription
0,5990,55137,245.0,53,PT-E550W BUNDLE,Other Printers,Printers/AIO/Copiers/Fax
1,5990,17717,793.0,39,TZE-231 LAMINATED TAPE 12MM 8M,Ribbon,Consumables
2,5990,17710,413.0,36,TZE-221 LAMINATED TAPE 9MM 8M,Ribbon,Consumables
3,5990,17722,172.0,21,TZE-241 LAMINATED TAPE 18MM 8M,Ribbon,Consumables
4,5990,48013,60.0,17,DISPLEX SMART GLASS SAMSUNG,Screen Protectors,Mobility
...,...,...,...,...,...,...,...
458,5990,25547,1.0,1,UNIVER.DUAL MONITOR CLAMP MOUNT,Display Mounting Kits,Display
459,5990,25459,2.0,1,OTTERBOX CAR CHARGER 18W USB C,Usb Cables & Adapters,Cables
460,5990,25319,1.0,1,WHITE CAT7 SFTP CABLE5M 16.4FT,Network Ethernet Cables & Adapters,Cables
461,5990,25317,1.0,1,BLUE CAT7 SFTP CABLE5M 16.4FT,Network Ethernet Cables & Adapters,Cables


## Recommendations

#### Model 1

In [125]:
# Now you can call the recommend function
userid = [user_id]
ids, scores = model.recommend(userid, csr_overall[userid], N=10, filter_already_liked_items=True)
ids, scores

rec_tab = pd.DataFrame(data=[ids[0],scores[0]])
rec_tab = rec_tab.T.rename(columns={0:"sku_id", 1:"score", 2:"sku_id", 3:"score"})
rec_tab = rec_tab.merge(sku_list, on="sku_id", how="left")
rec_tab.to_csv(f'{user_id}_ALS_QTY_REC.csv')
files.download(f'{user_id}_ALS_QTY_REC.csv')
rec_tab
# rec_tab.sort_values(by="conf", ascending=False)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0,sku_id,score,Product Descr1,ProductGroupDescription,ProductGroupMasterDescription
0,49717.0,0.012249,1000G NV2 M.2 2280 NVME SSD NV2,Solid State Drive (Ssd),Hard Drives & Optical Drives
1,36986.0,0.011631,1000G FURY RENEGADE M.2 2280,Solid State Drive (Ssd),Hard Drives & Optical Drives
2,36983.0,0.011377,2048G KC3000 NVME M.2 SSD,Solid State Drive (Ssd),Hard Drives & Optical Drives
3,33052.0,0.011365,16GB DDR4-2666MHZ CL15 SODIMM,Generic Memory,Memory and Processors
4,12061.0,0.010687,128GB MICROSDXC CANVAS SELECT,Flash Memory,Memory and Processors
5,49718.0,0.010271,2000G NV2 M.2 2280 NVME SSD NV2,Solid State Drive (Ssd),Hard Drives & Optical Drives
6,11994.0,0.00993,120GB A400 SATA3 2.5 SSD 7MM,Solid State Drive (Ssd),Hard Drives & Optical Drives
7,32870.0,0.009863,32GB DDR4-2666MHZ CL16 DIMM,Generic Memory,Memory and Processors
8,33055.0,0.009785,32GB DDR4-2666MHZ CL16 SODIMM,Generic Memory,Memory and Processors
9,50403.0,0.009769,32GB DDR5-6000MT/S CL36 DIMM,Generic Memory,Memory and Processors


In [126]:
# Now you can call the recommend function
userid = [user_id_2]
ids, scores = model.recommend(userid, csr_overall[userid], N=10, filter_already_liked_items=True)
ids, scores

rec_tab = pd.DataFrame(data=[ids[0],scores[0]])
rec_tab = rec_tab.T.rename(columns={0:"sku_id", 1:"score", 2:"sku_id", 3:"score"})
rec_tab = rec_tab.merge(sku_list, on="sku_id", how="left")
rec_tab.to_csv(f'{user_id_2}_ALS_QTY_REC.csv')
files.download(f'{user_id_2}_ALS_QTY_REC.csv')
rec_tab
# rec_tab.sort_values(by="conf", ascending=False)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0,sku_id,score,Product Descr1,ProductGroupDescription,ProductGroupMasterDescription
0,51011.0,0.87456,4-PORT USB HUB 5GBPS PORTABLE,Usb Hubs & Controllers,Communications & Networking
1,14980.0,0.794001,SAVI 8220 UC S8220 C D200 USB-A,Computer Headset,Multimedia & Games
2,42601.0,0.731894,LC-422 VALUE PACK INK F.,Ink,Consumables
3,65745.0,0.718481,V7 PRIVACY FILTER 23.8IN 16:09,Monitor Accessories,Display
4,17980.0,0.686593,LC-3219XLC INK CARTRIDGE CYAN,Ink,Consumables
5,17981.0,0.657574,LC-3219XLY INKCARTRIDGE YELLOW,Ink,Consumables
6,17808.0,0.65274,LC-127XLVALBPDR F. MFC-J4510DW,Ink,Consumables
7,14981.0,0.560734,SAVI 8220 UC S8220-M C D200,Computer Headset,Multimedia & Games
8,17742.0,0.544346,TZE-345 LAMINATED TAPE 18MM 8M,Ribbon,Consumables
9,54263.0,0.516779,BRIO 300 FULL HD WEBCAM,Webcamera,Multimedia & Games


# Test LibRecommender

In [96]:
#!pip install LibRecommender

In [97]:
import os
import datetime
import zipfile

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras

from pathlib import Path
import warnings
warnings.filterwarnings("ignore")

In [98]:
from libreco.data import split_by_num, random_split
from libreco.data import DatasetFeat

In [99]:
df_3 = df_2.copy()

In [100]:
df_2 = df_2.rename(columns={"bcn_id":"user", "sku_id":"item", "purchase":"label"})

In [101]:
train, test, eval = random_split(df_2[["user", "item", "label"]], multi_ratios=[0.8,0.1,0.1])

In [102]:
train, data_info = DatasetFeat.build_trainset(train)
eval = DatasetFeat.build_evalset(eval)
test = DatasetFeat.build_testset(test)

In [103]:
from libreco.algorithms import ALS as ALS_lib
from libreco.evaluation import evaluate
from libreco.data import random_split, DatasetPure

Instructions for updating:
non-resource variables are not supported in the long term


In [104]:
metrics = [
        "loss",
        "balanced_accuracy",
        "roc_auc",
        "precision",
        "recall",
        "map",
        "ndcg",
    ]

In [105]:
model = ALS_lib(
    "ranking",
    data_info=data_info,
    embed_size=16,
    n_epochs=10,
    reg=0.01,
    alpha=20,
    n_threads=10,
)

In [106]:
model.fit(train_data=train,
          neg_sampling=True,
          verbose=1,
          shuffle=False,
          eval_data=eval,
          metrics=metrics,
          n_neg=100
          )

Training start time: [35m2023-08-12 21:40:42[0m
Epoch 1 elapsed: 0.943s
Epoch 2 elapsed: 1.011s
Epoch 3 elapsed: 0.997s
Epoch 4 elapsed: 0.964s
Epoch 5 elapsed: 4.728s
Epoch 6 elapsed: 1.213s
Epoch 7 elapsed: 0.960s
Epoch 8 elapsed: 0.952s
Epoch 9 elapsed: 0.923s
Epoch 10 elapsed: 0.988s


In [107]:
eval_result = evaluate(model=model,
        data=test,
        neg_sampling=True,
        eval_batch_size=2048,
        k=10,
        metrics=metrics)
eval_result

eval_pointwise: 100%|██████████| 50/50 [00:00<00:00, 1590.73it/s]
eval_listwise: 100%|██████████| 6365/6365 [00:15<00:00, 407.81it/s]


{'loss': 0.5952633752482153,
 'balanced_accuracy': 0.6855551134102666,
 'roc_auc': 0.8926305560864642,
 'precision': 0.05025923016496465,
 'recall': 0.08798001336038111,
 'map': 0.10425864470726097,
 'ndcg': 0.1372756319639581}

In [108]:
eval_result = evaluate(model=model,
        data=eval,
        neg_sampling=True,
        eval_batch_size=2048,
        k=10,
        metrics=metrics)
eval_result

eval_pointwise: 100%|██████████| 50/50 [00:00<00:00, 1587.20it/s]
eval_listwise: 100%|██████████| 6378/6378 [00:14<00:00, 426.15it/s]


{'loss': 0.5954522432609299,
 'balanced_accuracy': 0.685174858599538,
 'roc_auc': 0.8935012813257226,
 'precision': 0.04940420194418312,
 'recall': 0.08603109104599853,
 'map': 0.10238116882056904,
 'ndcg': 0.13456196635015955}