## Тема: 1. Введение, примеры задач, бизнес- и ML-метрики

In [1]:
import numpy as np
import pandas as pd

In [2]:
recommended_list = [143, 156, 1134, 991, 27, 1543, 3345, 533, 11, 43]
prices_recommended_list = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
bought_list = [521, 32, 143, 991]

### Hit rate@k

In [3]:
def hit_rate_at_k(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)[:k]
    flags = np.isin(bought_list, recommended_list)
    
    hit_rate = (flags.sum() > 0).astype(int)
    
    return hit_rate

In [4]:
hit_rate_at_k(recommended_list, bought_list, k=4)

1

### Money Precision@k

In [5]:
def money_precision_at_k(recommended_list, bought_list, prices_recommended_list, k=5):
        
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)[:k]
    prices_recommended_list = np.array(prices_recommended_list)[:k]
    
    flags = np.isin(recommended_list, bought_list)

    sum_flags = np.dot(flags, prices_recommended_list)
    
    precision = sum_flags / np.sum(prices_recommended_list)
    
    return precision

In [6]:
money_precision_at_k(recommended_list, bought_list, prices_recommended_list, k=4)

0.5

### Recall@k

In [7]:
def recall_at_k(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)[:k]
    
    flags = np.isin(bought_list, recommended_list)
    
    recall = flags.sum() / len(bought_list)
    
    return recall

In [8]:
recall_at_k(recommended_list, bought_list, k=4)

0.5

### Money Recall@k

In [9]:
def money_recall_at_k(recommended_list, bought_list, prices_recommended_list, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)[:k]
    prices_recommended_list = np.array(prices_recommended_list)[:k]
    
    flags = np.isin(recommended_list, bought_list)
    
    sum_flags = np.dot(flags, prices_recommended_list)
    
    recall = sum_flags / len(bought_list)
    
    return recall

In [10]:
money_recall_at_k(recommended_list, bought_list, prices_recommended_list, k=4)

12.5

### MAP@k

In [11]:
bought_list1 = [521, 156, 143, 991]
bought_list2 = [143, 558, 11, 84]
bought_list3 = [1543, 533, 11, 27]
bought_list_all = [bought_list1, bought_list2, bought_list3]
bought_list_all

[[521, 156, 143, 991], [143, 558, 11, 84], [1543, 533, 11, 27]]

In [12]:
def precision_at_k(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)[:k]
    
    flags = np.isin(bought_list, recommended_list)
    
    precision = flags.sum() / len(recommended_list)
    
    return precision

def map_k(recommended_list, bought_list_all, k=5):
    
    recommended_list = np.array(recommended_list)[:k]
    all_users = len(bought_list_all)
    sum_all = 0
    
    for i in range(0, all_users):
        bought_list = np.array(bought_list_all[i])
        flags = np.isin(recommended_list, bought_list)

        sum_ = 0
        for j in range(0, k):
            if flags[j] == True:
                p_k = precision_at_k(recommended_list, bought_list, k=j+1)
                sum_ += p_k
        sum_all += sum_ / k
    
    return sum_all / all_users

In [13]:
map_k(recommended_list, bought_list_all, k=4)

0.3125

### \* NDCG@k 

In [14]:
def dcg_at_k(recommended_list, bought_list_all, k=5):
    
    recommended_list = np.array(recommended_list)[:k]
    all_users = len(bought_list_all)
    sum_all = 0
    
    for i in range(0, all_users):
        bought_list = np.array(bought_list_all[i])
        flags = np.isin(recommended_list, bought_list)

        sum_ = 0
        for i in range(1, k+1):
            if i == 1 and flags[i-1] == True:
                sum_ += 1
            elif i != 1 and flags[i-1] == True:
                sum_ += 1/np.log2(i+1)

        sum_all += sum_ / k
           
    return sum_all / all_users

In [15]:
dcg_at_k(recommended_list, bought_list_all, k=7)

0.1811745859994007

In [16]:
def ideal_dcg_at_k(k=5):
    
    if k == 1: 
        return 1
    
    sum_ = 1
    for i in range(2, k+1):
        sum_ += 1/np.log2(i+1)
           
    return sum_ / k   

In [17]:
ideal_dcg_at_k(k=7)

0.5197142341886783

In [18]:
def ndcg_at_k(recommended_list, bought_list_all, k=5):
    
    dcg = dcg_at_k(recommended_list, bought_list_all, k)
    ideal_dcg = ideal_dcg_at_k(k)

    return dcg / ideal_dcg

In [19]:
ndcg_at_k(recommended_list, bought_list_all, k=7)

0.34860424071746055

### MRR@k

In [20]:
def reciprocal_rank_at_k(recommended_list, bought_list, k=5):
    
    ranks = 0
    for item_rec in recommended_list[:k]:
        for i, item_bought in enumerate(bought_list):
            if item_rec == item_bought:
                ranks += 1 / (i+1)
                
    return ranks / k

In [21]:
reciprocal_rank_at_k(recommended_list, bought_list, k=4)

0.14583333333333331

### \* AP для ранжирования по примеру

In [22]:
recommended_list

[143, 156, 1134, 991, 27, 1543, 3345, 533, 11, 43]

In [23]:
bought_list_example = [143, 151, 1134, 991, 270, 1543, 345, 535] 

In [24]:
def ap_k_example(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    flags = np.isin(recommended_list, bought_list)

    if sum(flags) == 0:
        return 0
    
    sum_ = 0
    num_true = 0
    for i in range(0, k-1): 
        if flags[i] == True:
            num_true += 1
            sum_ += round(num_true/(i+1), 2)
        print(f'{num_true} - {sum_}')

    result = sum_ / sum(flags)
    
    return round(result, 2)

In [25]:
ap_k_example(recommended_list, bought_list_example, k=8)

1 - 1.0
1 - 1.0
2 - 1.67
3 - 2.42
3 - 2.42
4 - 3.09
4 - 3.09


0.77