Примеры метрик для оценки рекомендаций/ранжирования:

<b>Метрики на основе ранговой корреляции</b><br>
    Ранговый коэффициент корреляции Кендэлла — основан на подсчете согласованных
    (и несогласованных) пар у перестановок — пар элементов, котором перестановки присвоили одинаковый (разный) порядок<br>
    Ранговый коэффициент корреляции Спирмена — корреляция Пирсона, посчитанной на значениях рангов.

<b>Метрики на основе каскадной модели поведения</b><br>
    Expected reciprocal rank — пример метрики качества ранжирования, основанной на каскадной модели<br>
    PFound — метрика качества ранжирования, предложенная нашими соотечественниками и использующая похожую на каскадную модель

In [1]:
import pandas as pd
import numpy as np

recommended_list = [143, 156, 1134, 991, 27, 1543, 3345, 533, 11, 43] 
bought_list = [521, 43, 143, 991]
prices_recommended = [400, 60, 40, 40 , 90, 5, 100, 50, 600, 300]

In [2]:
def hit_rate(recommended_list, bought_list):
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    flags = np.isin(bought_list, recommended_list)
    return (flags.sum() > 0) * 1

#Hit rate@k = (был ли хотя бы 1 релевантный товар среди топ-k рекомендованных)
def hit_rate_at_k(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)[:k]
    
    flags = np.isin(bought_list, recommended_list)
    return (flags.sum() > 0) * 1

In [3]:
hit_rate(recommended_list, bought_list)

1

In [4]:
hit_rate_at_k(recommended_list, bought_list)

1

In [5]:
def precision(recommended_list, bought_list):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    flags = np.isin(bought_list, recommended_list)
    return flags.sum() / len(recommended_list)

#Precision@k = (# of recommended items @k that are relevant) / (# of recommended items @k)
def precision_at_k(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)[:k]
    
    flags = np.isin(bought_list, recommended_list)
    return flags.sum() / len(recommended_list)

def money_precision_at_k_(recommended_list, bought_list, prices_recommended, k=5):
    
    recommend_list = np.array(recommended_list)[:k]
    prices_recommended = np.array(prices_recommended)[:k]
    
    flags = np.isin(recommend_list, bought_list)
    
    precision = np.dot(flags, prices_recommended).sum() / prices_recommended.sum()
    
    return precision

In [6]:
precision(recommended_list, bought_list)

0.3

In [7]:
precision_at_k(recommended_list, bought_list)

0.4

In [8]:
money_precision_at_k_(recommended_list, bought_list, prices_recommended)

0.6984126984126984

In [9]:
def recall(recommended_list, bought_list):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    flags = np.isin(bought_list, recommended_list)
    return flags.sum() / len(bought_list)
    
#Recall@k = (# of recommended items @k that are relevant) / (# of relevant items)
def recall_at_k(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)[:k]
    
    flags = np.isin(bought_list, recommended_list)
    return flags.sum() / len(bought_list)

#Money Recall@k = (revenue of recommended items @k that are relevant) / (revenue of relevant items)
#параметр prices_bought - убрал, так как считаю внутри def
def money_recall_at_k(recommended_list, bought_list, prices_recommended, k=5):
   
    flags = np.isin(recommended_list, bought_list)
    
    recommend_list_k = np.array(recommended_list)[:k]
    prices_recommended_k = np.array(prices_recommended)[:k]
    
    flags_k = np.isin(recommend_list_k, bought_list)
    
    precision = np.dot(flags_k, prices_recommended_k).sum() / np.dot(flags, prices_recommended).sum()
    
    return precision

In [10]:
recall(recommended_list, bought_list)

0.75

In [11]:
recall_at_k(recommended_list, bought_list)

0.5

In [12]:
money_recall_at_k(recommended_list, bought_list, prices_recommended)

0.5945945945945946

In [13]:
recommended_list = [4,22,21,1,234,234,234,666] #id товаров
bought_list = [1,221,3,4,5,6,7,8,9]

In [14]:
#AP@k
def ap_k(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)[:k]
    
    relevant_indexes = np.nonzero(np.isin(recommended_list, bought_list))[0]
    if len(relevant_indexes) == 0:
        return 0
    
    amount_relevant = len(relevant_indexes)
            
    sum_ = sum([precision_at_k(recommended_list, bought_list, k=index_relevant+1) for index_relevant in relevant_indexes])
    return sum_/amount_relevant

In [15]:
ap_k(recommended_list, bought_list, k=5)

0.75

In [16]:
#MAP@k
# теперь список из 3 пользователей
recommended_list_3_users = [[143, 156, 1134, 991, 27, 1543, 3345, 533, 11, 43], 
                    [1134, 533, 14, 4, 15, 1543, 1, 99, 27, 3345],
                    [991, 3345, 27, 533, 43, 143, 1543, 156, 1134, 11]
                    ]

bought_list_3_users = [[521, 32, 143],  # юзер 1
                       [143, 156, 991, 43, 11], # юзер 2
                       [1,2]] # юзер 3

In [17]:
def map_k(recommended_list, bought_list, k=5):
    
    sum_list = [0]
    
    for i, val in enumerate(bought_list):
        
        bought_list_i = np.array(val)
        recommended_list_i = np.array(recommended_list[i])[:k]

        relevant_indexes = np.nonzero(np.isin(recommended_list_i, bought_list_i))[0]
        if len(relevant_indexes) == 0:
            sum_list.append(0)
            continue

        amount_relevant = len(relevant_indexes)

        sum_list.append(sum([precision_at_k(recommended_list_i, bought_list_i, k=index_relevant+1) for index_relevant in relevant_indexes]))
     
    return sum(sum_list)/(i+1) 

In [18]:
map_k(recommended_list_3_users, bought_list_3_users)

0.3333333333333333

In [19]:
#AUC@k
# по желанию
def ndcg_at_k(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)[:k]
    
    flags = np.isin(bought_list, recommended_list)
    
    dcg = 0
    dcg_i = 0
    
    for i, val in enumerate(flags):
        
        if i < 2:
            dcg += val/(i+1)
            dcg_i += 1/(i+1)
        else:
            dcg += val/np.log(i+1)
            dcg_i += 1/np.log(i+1)
    
    return (dcg/k)/(dcg_i/k)


In [20]:
ndcg_at_k(recommended_list, bought_list)

0.2987958471299362

In [21]:
#MRR@k
def reciprocal_rank(recommended_list, bought_list, k=1):
    
    for i, val in enumerate(recommended_list[:k]):
        if val in bought_list:
            break
    
    return 1/(i+1)
    

In [22]:
reciprocal_rank(recommended_list, bought_list)

1.0